From ec4bb3110930c7dbcef9874d04037de9b3622ba6 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Fri, 2 Jun 2023 17:52:55 +0100
Subject: [PATCH 01/20] early draft of epidemic risk vignette

---
 vignettes/epidemic_risk.Rmd | 86 +++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 vignettes/epidemic_risk.Rmd

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
new file mode 100644
index 0000000..4d52c6b
--- /dev/null
+++ b/vignettes/epidemic_risk.Rmd
@@ -0,0 +1,86 @@
+---
+title: "Epidemic Risk"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Epidemic Risk}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+During the early stages of an infectious disease outbreak information on transmission
+clusters can provide insight into the occurrence of superspreading events and from this
+the probability an outbreak will cause an epidemic. This has implications for policy
+decisions whether control interventions are required to bring disease transmission 
+under control (e.g. $R$ < 1) and increase the probability that an outbreak goes extinct.
+
+This vignette explores ...
+
+```{r setup}
+library(superspreading)
+library(ggplot2)
+```
+
+## Plot from Kucharski et al (2020)
+
+```{r}
+epidemic_params <- expand.grid(
+  R = 2.35, R_lw = 1.5, R_up = 3.2, k = seq(0, 1, 0.1), a = seq(0, 10, 1)
+)
+# results are transposed to pivot to long rather than wide data
+prob_epidemic <- t(apply(epidemic_params, 1, function(x) {
+  median <- probability_epidemic(R = x[["R"]], k = x[["k"]], a = x[["a"]])
+  lower <- probability_epidemic(R = x[["R_lw"]], k = x[["k"]], a = x[["a"]])
+  upper <- probability_epidemic(R = x[["R_up"]], k = x[["k"]], a = x[["a"]])
+  c(prob_epidemic = median, prob_epidemic_lw = lower, prob_epidemic_up = upper)
+})) 
+epidemic_params <- cbind(epidemic_params, prob_epidemic)
+
+# subset data for a single initial infection
+homogeneity <- subset(epidemic_params, a == 1)
+
+# plot probability of epidemic across dispersion
+ggplot2::ggplot(data = homogeneity) +
+  ggplot2::geom_ribbon(mapping = ggplot2::aes(x = k, ymin = prob_epidemic_lw, ymax = prob_epidemic_up), fill = "grey70") +
+  ggplot2::geom_line(mapping = ggplot2::aes(x = k, y = prob_epidemic)) +
+  ggplot2::scale_y_continuous(
+    name = "Probability of large outbreak", 
+    limits = c(0, 1)
+  ) +
+  ggplot2::scale_x_continuous(name = "Extent of homogeneity in transmission") +
+  ggplot2::theme_bw()
+```
+
+```{r}
+introductions <- subset(epidemic_params, k == 0.5)
+ggplot2::ggplot(data = introductions) +
+  ggplot2::geom_pointrange(
+    mapping = ggplot2::aes(
+      x = a, 
+      y = prob_epidemic, 
+      ymin = prob_epidemic_lw, 
+      ymax = prob_epidemic_up
+    )
+  ) +
+  ggplot2::scale_y_continuous(
+    name = "Probability of large outbreak", 
+    limits = c(0, 1)
+  ) +
+  ggplot2::scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
+  ggplot2::theme_bw()
+```
+
+## Plot from CMMID dashboard
+
+## Plot from Lloyd-Smith et al (2005)
+
+## Link to policy report
+
+
+

From f953e98db4437dfd5a7abd05ed846b2ff63bbcb7 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 7 Jun 2023 14:13:47 +0100
Subject: [PATCH 02/20] updated epidemic_risk vignette

---
 vignettes/epidemic_risk.Rmd | 150 +++++++++++++++++++++++++++++++++---
 1 file changed, 141 insertions(+), 9 deletions(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index 4d52c6b..fecfef4 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -1,6 +1,12 @@
 ---
 title: "Epidemic Risk"
-output: rmarkdown::html_vignette
+output: 
+  bookdown::html_vignette2:
+    code_folding: show
+pkgdown:
+   as_is: true
+bibliography: references.json
+link-citations: true
 vignette: >
   %\VignetteIndexEntry{Epidemic Risk}
   %\VignetteEngine{knitr::rmarkdown}
@@ -20,16 +26,27 @@ the probability an outbreak will cause an epidemic. This has implications for po
 decisions whether control interventions are required to bring disease transmission 
 under control (e.g. $R$ < 1) and increase the probability that an outbreak goes extinct.
 
-This vignette explores ...
+A recent example of where individual-level transmission and superspreading was analysed
+to understand disease transmission was by the [Scientific Advisory Group for Emergencies 
+(SAGE)](https://www.gov.uk/government/organisations/scientific-advisory-group-for-emergencies) 
+for the [UK's response COVID-19](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020). 
+
+This vignette explores the applications of the functions included the {superspreading} package and their visualiation in understanding and informing decision makers for a variety of outbreak scenarios.
 
 ```{r setup}
 library(superspreading)
 library(ggplot2)
 ```
 
-## Plot from Kucharski et al (2020)
+Early in the COVID-19 epidemic @kucharskiEarlyDynamicsTransmission2020 investigated
+whether data on SARS-CoV-2 transmission displayed signatures of overdispersion (i.e.
+evidence of superspreading events). This was in light of evidence that other corona virus
+outbreaks (Severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS))
+had experienced superspreading events. The detection of variability in individual-transmission
+is important in understanding or predicting the likelihood of transmission establishing
+when imported into a new location (e.g. country). 
 
-```{r}
+```{r, prep-dispersion-plot}
 epidemic_params <- expand.grid(
   R = 2.35, R_lw = 1.5, R_up = 3.2, k = seq(0, 1, 0.1), a = seq(0, 10, 1)
 )
@@ -44,11 +61,24 @@ epidemic_params <- cbind(epidemic_params, prob_epidemic)
 
 # subset data for a single initial infection
 homogeneity <- subset(epidemic_params, a == 1)
+```
 
+```{r, plot-dispersion, class.source = 'fold-hide', fig.cap="The probability that an initial infection (introduction) will cause a sustained outbreak (transmission chain). The dispersion of the individual-level transmission is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. This plot is reproduced from Kucharski et al. (2020) figure 3A.", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across dispersion
 ggplot2::ggplot(data = homogeneity) +
-  ggplot2::geom_ribbon(mapping = ggplot2::aes(x = k, ymin = prob_epidemic_lw, ymax = prob_epidemic_up), fill = "grey70") +
+  ggplot2::geom_ribbon(
+    mapping = ggplot2::aes(
+      x = k, 
+      ymin = prob_epidemic_lw, 
+      ymax = prob_epidemic_up
+    ), 
+    fill = "grey70"
+  ) +
   ggplot2::geom_line(mapping = ggplot2::aes(x = k, y = prob_epidemic)) +
+  ggplot2::geom_vline(mapping = ggplot2::aes(xintercept = 0.2), linetype = "dashed") +
+  ggplot2::annotate(geom = "text", x = 0.15, y = 0.75, label= "SARS") + 
+  ggplot2::geom_vline(mapping = ggplot2::aes(xintercept = 0.4), linetype = "dashed") +
+  ggplot2::annotate(geom = "text", x = 0.45, y = 0.75, label = "MERS") +
   ggplot2::scale_y_continuous(
     name = "Probability of large outbreak", 
     limits = c(0, 1)
@@ -57,8 +87,21 @@ ggplot2::ggplot(data = homogeneity) +
   ggplot2::theme_bw()
 ```
 
-```{r}
+The degree of variability in transmission increases as $k$ decreases. So the probability
+of a large outbreak is smaller for smaller values of $k$, meaning that if SARS-CoV-2
+is more similar to SARS than MERS it will be less likely to establish and cause an 
+outbreak if introduced into a newly susceptible population.
+
+However, this probability of establishment and continued transmission will also
+depend on the number of initial introductions to that populations. The more
+introductions, the higher the chance one will lead to an epidemic.
+
+```{r, prep-introductions-plot}
 introductions <- subset(epidemic_params, k == 0.5)
+```
+
+```{r, plot-introductions, class.source = 'fold-hide', fig.cap="The probability that an a number of introduction events will cause a sustained outbreak (transmission chain). The number of disease introductions is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. This plot is reproduced from Kucharski et al. (2020) figure 3B.", fig.width = 8, fig.height = 5}
+# plot probability of epidemic across introductions
 ggplot2::ggplot(data = introductions) +
   ggplot2::geom_pointrange(
     mapping = ggplot2::aes(
@@ -76,11 +119,100 @@ ggplot2::ggplot(data = introductions) +
   ggplot2::theme_bw()
 ```
 
-## Plot from CMMID dashboard
+Different levels of heterogeneity in transmission will produce different probabilities of epidemics.
 
-## Plot from Lloyd-Smith et al (2005)
+```{r, plot-introductions-multi-k, class.source = 'fold-hide', fig.cap="The probability that an a number of introduction events will cause a sustained outbreak (transmission chain). The number of disease introductions is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. Different values of dispersion are plotted to show the effect of increased transmission variability on an epidemic establishing", fig.width = 8, fig.height = 5}
+# plot probability of epidemic across introductions for multiple k
+ggplot2::ggplot(data = epidemic_params) +
+  ggplot2::geom_point(
+    mapping = ggplot2::aes(
+      x = a, 
+      y = prob_epidemic,
+      colour = k
+    )
+  ) +
+  ggplot2::scale_y_continuous(
+    name = "Probability of large outbreak", 
+    limits = c(0, 1)
+  ) +
+  ggplot2::labs(colour = "Dispersion (*k*)") +
+  ggplot2::scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
+  ggplot2::scale_colour_continuous(type = "viridis") +
+  ggplot2::theme_bw() +
+  ggplot2::theme(legend.title = ggtext::element_markdown())
+```
+
+In the above plot we drop the uncertainty around each point and assume a known value
+of R in order to more clearly show the pattern.
+
+Applications that make it easy to explore this functionality and compare between existing
+parameter estimates of offspring distribution are also useful when
+knowledge of superspreading in the early stages of an outbreak. An example of this is the
+[Shiny app developed by the Centre for Mathematical Modelling of Infectious diseases](https://cmmid.github.io/visualisations/new-outbreak-probability) at the London
+School of Hygiene and Tropical Medicine, for comparing SARS-like and MERS-like scenarios, 
+as well as random mixing.
+
+Conversely to the probability of an epidemics, the probability that an outbreak will 
+go extinct (i.e. transmission will subside), can also be plotted for different values of 
+$R$.
+
+```{r, prep-exinction-plot}
+extinction_params <- expand.grid(
+  R = seq(0, 5, 0.1), 
+  k = c(0.01, 0.1, 0.5, 1, 4, Inf),
+  a = 1
+)
+# results are transposed to pivot to long rather than wide data
+prob_extinct <- apply(extinction_params, 1, function(x) {
+  median <- probability_extinct(R = x[["R"]], k = x[["k"]], a = x[["a"]])
+  median
+})
+extinction_params <- cbind(extinction_params, prob_extinct)
+```
+
+```{r, plot-extinction, class.source = 'fold-hide', fig.cap="The probability that an infectious disease will go extinct for a given value of $R$ and $k$. This is calculated using `probability_extinct()` function.", fig.width = 8, fig.height = 5}
+# plot probability of extinction across R for multiple k
+ggplot2::ggplot(data = extinction_params) +
+  ggplot2::geom_point(mapping = ggplot2::aes(x = R, y = prob_extinct, colour = factor(k))) +
+    ggplot2::scale_y_continuous(
+    name = "Probability of extinction", 
+    limits = c(0, 1)
+  ) +
+  ggplot2::labs(colour = "Dispersion (*k*)") +
+  ggplot2::scale_x_continuous(name = "Reproductive number (*R*)", n.breaks = 6) +
+  ggplot2::scale_color_viridis_d() +
+  ggplot2::theme_bw() +
+  ggplot2::theme(
+    axis.title.x = ggtext::element_markdown(),
+    legend.title = ggtext::element_markdown()
+  )
+```
+
+## Superspreading in decision making
+
+One of the first tasks in an outbreak is to establish whether estimates of individual-level
+transmission variability have been reported, either for this outbreak or a previous outbreak
+of the same pathogen. Alternatively, if it is an outbreak of a novel pathogen, parameters
+for similar pathogens can be searched for. 
+
+This is accomplished by another Epiverse-TRACE R package: [{epiparameter}](https://epiverse-trace.github.io/epiparameter/index.html). This package
+contains a library of epidemiological parameters included offspring distribution parameter estimates and uncertainty with the corresponding source (e.g. published article). This aids the collation and establish a current understanding as was done for [COVID-19](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020).
+
+From these parameter estimates metrics to give an intuitive understanding of transmission patterns can be tabulated.
+For example the proportion of transmission events in a given cluster size can be calculated using the {superspreading} function `proportion_cluster_size()`.
+
+```{r}
+# For R = 0.8
+proportion_cluster_size(R = 0.8, k = seq(0.1, 1, 0.1), cluster_size = c(5, 10, 25))
+
+# For R = 3
+proportion_cluster_size(R = 0.8, k = seq(0.1, 1, 0.1), cluster_size = c(5, 10, 25))
+```
 
-## Link to policy report
+These results indicate whether preventing gatherings of a certain size can reduce the epidemic
+by preventing potential superspreading events.
 
+When data on the settings of transmission is known, priority can be given to restricting 
+particular types of gatherings to most effectively bring down the reproduction number.
 
 

From 71c84e4fe93be85fcc0fe4e41423a474d0ab8979 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 7 Jun 2023 14:13:56 +0100
Subject: [PATCH 03/20] added ggtext to Suggests

---
 DESCRIPTION | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b67a58a..ab2e8df 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,8 +43,9 @@ Suggests:
     bookdown,
     rmarkdown,
     ggplot2,
-    testthat (>= 3.0.0),
-    spelling
+    spelling,
+    ggtext,
+    testthat (>= 3.0.0)
 Remotes:
     epiverse-trace/epiparameter
 Config/testthat/edition: 3

From d7b6db7dee25d7134d571d66e5ae0c549432be1c Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 7 Jun 2023 18:22:02 +0100
Subject: [PATCH 04/20] added initial version of probability_contain()
 function, WIP #13

---
 NAMESPACE                  |  1 +
 R/probability_contain.R    | 51 ++++++++++++++++++++++++++++++++++++
 man/probability_contain.Rd | 53 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+)
 create mode 100644 R/probability_contain.R
 create mode 100644 man/probability_contain.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8ea8f5c..3cb4b60 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ export(dpoislnorm)
 export(dpoisweibull)
 export(ppoislnorm)
 export(ppoisweibull)
+export(probability_contain)
 export(probability_epidemic)
 export(probability_extinct)
 export(proportion_cluster_size)
diff --git a/R/probability_contain.R b/R/probability_contain.R
new file mode 100644
index 0000000..0a519a6
--- /dev/null
+++ b/R/probability_contain.R
@@ -0,0 +1,51 @@
+#' Probability that an outbreak will be contained
+#'
+#' @description
+#' Containment is defined as not reaching 100 cases
+#'
+#'
+#' @inheritParams probability_epidemic
+#' @param c Control strength, 0 is no control measures, 1 is complete control.
+#' @param control_type Either `"population"` or `"individual"` for population-level
+#' or individual-level control measures.
+#' @param stochastic Whether to use a stochastic branching process model or the
+#' probability of extinction.
+#' @param ... arguments to be passed to [bpmodels::chain_sim()].
+#' @param case_threshold A number for the threshold of the number of cases below
+#' which the epidemic is considered contained.
+#'
+#' @return A number for the probability of containment
+#' @export
+#'
+#' @references
+#'
+#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005)
+#' Superspreading and the effect of individual variation on disease emergence.
+#' Nature, 438(7066), 355-359. <https://doi.org/10.1038/nature04153>
+#'
+#' @examples
+#' probability_contain(R = 1.5, k = 0.5, c = 1)
+probability_contain <- function(R, k, a = 1, c,
+                                control_type = c("population", "individual"),
+                                stochastic = TRUE,
+                                ...,
+                                case_threshold = 100) {
+  control_type <- match.arg(control_type)
+  if (control_type == "population") {
+    R <- (1 - c) * R
+  } else {
+    stop("individual-level controls not yet implemented")
+  }
+
+  if (a != 1) {
+    stop("Multiple introductions is not yet implemented for probability_contain")
+  }
+
+  if (stochastic) {
+    chain_size <- bpmodels::chain_sim(n = 1e5, offspring = "nbinom", size = k, mu = R, infinite = case_threshold, ...)
+    prob_contain <- sum(!is.infinite(chain_size)) / length(chain_size)
+  } else {
+    prob_contain <- probability_extinct(R = R, k = k, a = a)
+  }
+  return(prob_contain)
+}
diff --git a/man/probability_contain.Rd b/man/probability_contain.Rd
new file mode 100644
index 0000000..9bdc09e
--- /dev/null
+++ b/man/probability_contain.Rd
@@ -0,0 +1,53 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/probability_contain.R
+\name{probability_contain}
+\alias{probability_contain}
+\title{Probability that an outbreak will be contained}
+\usage{
+probability_contain(
+  R,
+  k,
+  a = 1,
+  c,
+  control_type = c("population", "individual"),
+  stochastic = TRUE,
+  ...,
+  case_threshold = 100
+)
+}
+\arguments{
+\item{R}{A \code{number} specifying the R parameter (i.e. average secondary cases
+per infectious individual)}
+
+\item{k}{A \code{number} specifying the  k parameter (i.e. overdispersion in
+offspring distribution from fitted negative binomial)}
+
+\item{a}{A \code{count} specifying the number of initial infections}
+
+\item{c}{Control strength, 0 is no control measures, 1 is complete control.}
+
+\item{control_type}{Either \code{"population"} or \code{"individual"} for population-level
+or individual-level control measures.}
+
+\item{stochastic}{Whether to use a stochastic branching process model or the
+probability of extinction.}
+
+\item{...}{arguments to be passed to \code{\link[bpmodels:chain_sim]{bpmodels::chain_sim()}}.}
+
+\item{case_threshold}{A number for the threshold of the number of cases below
+which the epidemic is considered contained.}
+}
+\value{
+A number for the probability of containment
+}
+\description{
+Containment is defined as not reaching 100 cases
+}
+\examples{
+probability_contain(R = 1.5, k = 0.5, c = 1)
+}
+\references{
+Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005)
+Superspreading and the effect of individual variation on disease emergence.
+Nature, 438(7066), 355-359. \url{https://doi.org/10.1038/nature04153}
+}

From c7c1179ad62b7843188781f0aeab683d9275a745 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Wed, 7 Jun 2023 18:22:23 +0100
Subject: [PATCH 05/20] added bpmodels to Imports and Remotes in DESCRIPTION

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ab2e8df..0355e63 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -34,7 +34,8 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Imports:
     checkmate,
-    stats
+    stats,
+    bpmodels
 Suggests: 
     epiparameter,
     distributional,
@@ -48,6 +49,7 @@ Suggests:
     testthat (>= 3.0.0)
 Remotes:
     epiverse-trace/epiparameter
+    epiverse-trace/bpmodels
 Config/testthat/edition: 3
 Config/Needs/website:
     epiverse-trace/epiversetheme

From 8befbfd9eb0dd89aaad66e40bedc4f1255877332 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:16:13 +0100
Subject: [PATCH 06/20] fixed missing comma in DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0355e63..97a08b0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -48,7 +48,7 @@ Suggests:
     ggtext,
     testthat (>= 3.0.0)
 Remotes:
-    epiverse-trace/epiparameter
+    epiverse-trace/epiparameter,
     epiverse-trace/bpmodels
 Config/testthat/edition: 3
 Config/Needs/website:

From c20b22c625d52b4c1168c0b589d222ad694aba77 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:19:44 +0100
Subject: [PATCH 07/20] added containment plot to epidemic_risk, WIP #12

---
 vignettes/epidemic_risk.Rmd | 45 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index fecfef4..99f520e 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -170,7 +170,7 @@ prob_extinct <- apply(extinction_params, 1, function(x) {
 extinction_params <- cbind(extinction_params, prob_extinct)
 ```
 
-```{r, plot-extinction, class.source = 'fold-hide', fig.cap="The probability that an infectious disease will go extinct for a given value of $R$ and $k$. This is calculated using `probability_extinct()` function.", fig.width = 8, fig.height = 5}
+```{r, plot-extinction, class.source = 'fold-hide', fig.cap="The probability that an infectious disease will go extinct for a given value of $R$ and $k$. This is calculated using `probability_extinct()` function. This plot is reproduced from @lloyd-smithSuperspreadingEffectIndividual2005 figure 2B.", fig.width = 8, fig.height = 5}
 # plot probability of extinction across R for multiple k
 ggplot2::ggplot(data = extinction_params) +
   ggplot2::geom_point(mapping = ggplot2::aes(x = R, y = prob_extinct, colour = factor(k))) +
@@ -215,4 +215,47 @@ by preventing potential superspreading events.
 When data on the settings of transmission is known, priority can be given to restricting 
 particular types of gatherings to most effectively bring down the reproduction number.
 
+## Controls on transmission
 
+In their work on identifying the prevalence of overdispersion in individual-level transmission, @lloyd-smithSuperspreadingEffectIndividual2005 also showed the effect of control measures on the probability of containment. They defined containment as the size of a transmission chain not reaching 100 cases. They assumed control measures could be either population-level, or individual-level. We `probability_contain()` to calculate the probability that disease spread will go extinct before reaching a threshold size.
+
+```{r, prep-containment-plot}
+contain_params <- expand.grid(
+  R = 3, k = c(0.1, 0.5, 1, Inf), a = 1, c = seq(0, 1, 0.05)
+)
+prob_contain <- apply(contain_params, 1, function(x) {
+  probability_contain(
+    R = x[["R"]], 
+    k = x[["k"]], 
+    a = x[["a"]], 
+    c = x[["c"]], 
+    control_type = "population", 
+    stochastic = TRUE, 
+    case_threshold = 100
+  )
+})
+contain_params <- cbind(contain_params, prob_contain)
+```
+
+```{r, plot-containment, class.source = 'fold-hide', fig.cap="The probability that an outbreak will be contained (i.e. not exceed 100 chains) for a variety of population-level control measures. The probability of containment is calculated using `probability_contain()`. This plot is reproduced from Lloyd-Smith et al. (2005) figure 3C.", fig.width = 8, fig.height = 5}
+# plot probability of epidemic across introductions for multiple k
+ggplot2::ggplot(data = contain_params) +
+  ggplot2::geom_point(
+    mapping = ggplot2::aes(
+      x = c, 
+      y = prob_contain,
+      colour = factor(k)
+    )
+  ) + 
+  ggplot2::scale_y_continuous(
+    name = "Probability of containment", 
+    limits = c(0, 1)
+  ) +
+  ggplot2::scale_x_continuous(name = "Control measures (*c*)", n.breaks = 6) +
+  ggplot2::labs(colour = "Dispersion (*k*)") +
+  ggplot2::scale_color_viridis_d() +
+  ggplot2::theme_bw() +
+  ggplot2::theme(axis.title.x = ggtext::element_markdown(),
+                 legend.title = ggtext::element_markdown())
+
+```

From 2aef6551fcde8d935d76c829e36aa45cdfcd54f4 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:42:10 +0100
Subject: [PATCH 08/20] added tests for probability_contain

---
 tests/testthat/test-probability_contain.R | 28 +++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 tests/testthat/test-probability_contain.R

diff --git a/tests/testthat/test-probability_contain.R b/tests/testthat/test-probability_contain.R
new file mode 100644
index 0000000..04372da
--- /dev/null
+++ b/tests/testthat/test-probability_contain.R
@@ -0,0 +1,28 @@
+test_that("probability_contain works as expected", {
+  prob_contain <- probability_contain(R = 1.5, k = 0.5, a = 1, c = 0)
+  # larger tolerance for stochastic variance
+  expect_equal(prob_contain, 0.7672, tolerance = 1e-2)
+})
+
+test_that("probability_contain works as expected for deterministic", {
+  prob_contain <- probability_contain(R = 1.5, k = 0.5, a = 1, c = 0, stochastic = FALSE)
+  expect_equal(prob_contain, 0.768)
+})
+
+test_that("probability_contain works as expected for different case threshold", {
+  prob_contain <- probability_contain(R = 1.5, k = 0.5, a = 1, c = 0, case_threshold = 50)
+  # larger tolerance for stochastic variance
+  expect_equal(prob_contain, 0.76255, tolerance = 1e-2)
+})
+
+test_that("probability_contain fails as expected", {
+  expect_error(
+    probability_contain(R = 1, k = 1, a = 2, c = 1),
+    regexp = "(Multiple introductions is not yet implemented)"
+  )
+
+  expect_error(
+    probability_contain(R = 1, k = 1, a = 2, c = 1, control_type = "individual"),
+    regexp = "individual-level controls not yet implemented"
+  )
+})

From bc5d45ee3fecff559d76c456111a19c85bdc9646 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:47:51 +0100
Subject: [PATCH 09/20] linting

---
 R/probability_contain.R                   | 21 ++++--
 tests/testthat/test-probability_contain.R | 14 +++-
 vignettes/epidemic_risk.Rmd               | 90 ++++++++++++++---------
 3 files changed, 82 insertions(+), 43 deletions(-)

diff --git a/R/probability_contain.R b/R/probability_contain.R
index 0a519a6..6cf1bc1 100644
--- a/R/probability_contain.R
+++ b/R/probability_contain.R
@@ -6,8 +6,8 @@
 #'
 #' @inheritParams probability_epidemic
 #' @param c Control strength, 0 is no control measures, 1 is complete control.
-#' @param control_type Either `"population"` or `"individual"` for population-level
-#' or individual-level control measures.
+#' @param control_type Either `"population"` or `"individual"` for
+#' population-level or individual-level control measures.
 #' @param stochastic Whether to use a stochastic branching process model or the
 #' probability of extinction.
 #' @param ... arguments to be passed to [bpmodels::chain_sim()].
@@ -25,24 +25,33 @@
 #'
 #' @examples
 #' probability_contain(R = 1.5, k = 0.5, c = 1)
-probability_contain <- function(R, k, a = 1, c,
+probability_contain <- function(R, k, a = 1, c, # nolint
                                 control_type = c("population", "individual"),
                                 stochastic = TRUE,
                                 ...,
                                 case_threshold = 100) {
   control_type <- match.arg(control_type)
   if (control_type == "population") {
-    R <- (1 - c) * R
+    R <- (1 - c) * R # nolint
   } else {
     stop("individual-level controls not yet implemented")
   }
 
   if (a != 1) {
-    stop("Multiple introductions is not yet implemented for probability_contain")
+    stop(
+      "Multiple introductions is not yet implemented for probability_contain"
+    )
   }
 
   if (stochastic) {
-    chain_size <- bpmodels::chain_sim(n = 1e5, offspring = "nbinom", size = k, mu = R, infinite = case_threshold, ...)
+    chain_size <- bpmodels::chain_sim(
+      n = 1e5,
+      offspring = "nbinom",
+      size = k,
+      mu = R,
+      infinite = case_threshold,
+      ...
+    )
     prob_contain <- sum(!is.infinite(chain_size)) / length(chain_size)
   } else {
     prob_contain <- probability_extinct(R = R, k = k, a = a)
diff --git a/tests/testthat/test-probability_contain.R b/tests/testthat/test-probability_contain.R
index 04372da..52b6ef5 100644
--- a/tests/testthat/test-probability_contain.R
+++ b/tests/testthat/test-probability_contain.R
@@ -5,12 +5,16 @@ test_that("probability_contain works as expected", {
 })
 
 test_that("probability_contain works as expected for deterministic", {
-  prob_contain <- probability_contain(R = 1.5, k = 0.5, a = 1, c = 0, stochastic = FALSE)
+  prob_contain <- probability_contain(
+    R = 1.5, k = 0.5, a = 1, c = 0, stochastic = FALSE
+  )
   expect_equal(prob_contain, 0.768)
 })
 
-test_that("probability_contain works as expected for different case threshold", {
-  prob_contain <- probability_contain(R = 1.5, k = 0.5, a = 1, c = 0, case_threshold = 50)
+test_that("probability_contain works as expected for different threshold", {
+  prob_contain <- probability_contain(
+    R = 1.5, k = 0.5, a = 1, c = 0, case_threshold = 50
+  )
   # larger tolerance for stochastic variance
   expect_equal(prob_contain, 0.76255, tolerance = 1e-2)
 })
@@ -22,7 +26,9 @@ test_that("probability_contain fails as expected", {
   )
 
   expect_error(
-    probability_contain(R = 1, k = 1, a = 2, c = 1, control_type = "individual"),
+    probability_contain(
+      R = 1, k = 1, a = 2, c = 1, control_type = "individual"
+    ),
     regexp = "individual-level controls not yet implemented"
   )
 })
diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index 99f520e..aef016a 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -56,7 +56,7 @@ prob_epidemic <- t(apply(epidemic_params, 1, function(x) {
   lower <- probability_epidemic(R = x[["R_lw"]], k = x[["k"]], a = x[["a"]])
   upper <- probability_epidemic(R = x[["R_up"]], k = x[["k"]], a = x[["a"]])
   c(prob_epidemic = median, prob_epidemic_lw = lower, prob_epidemic_up = upper)
-})) 
+}))
 epidemic_params <- cbind(epidemic_params, prob_epidemic)
 
 # subset data for a single initial infection
@@ -68,19 +68,25 @@ homogeneity <- subset(epidemic_params, a == 1)
 ggplot2::ggplot(data = homogeneity) +
   ggplot2::geom_ribbon(
     mapping = ggplot2::aes(
-      x = k, 
-      ymin = prob_epidemic_lw, 
+      x = k,
+      ymin = prob_epidemic_lw,
       ymax = prob_epidemic_up
-    ), 
+    ),
     fill = "grey70"
   ) +
   ggplot2::geom_line(mapping = ggplot2::aes(x = k, y = prob_epidemic)) +
-  ggplot2::geom_vline(mapping = ggplot2::aes(xintercept = 0.2), linetype = "dashed") +
-  ggplot2::annotate(geom = "text", x = 0.15, y = 0.75, label= "SARS") + 
-  ggplot2::geom_vline(mapping = ggplot2::aes(xintercept = 0.4), linetype = "dashed") +
+  ggplot2::geom_vline(
+    mapping = ggplot2::aes(xintercept = 0.2),
+    linetype = "dashed"
+  ) +
+  ggplot2::annotate(geom = "text", x = 0.15, y = 0.75, label = "SARS") +
+  ggplot2::geom_vline(
+    mapping = ggplot2::aes(xintercept = 0.4),
+    linetype = "dashed"
+  ) +
   ggplot2::annotate(geom = "text", x = 0.45, y = 0.75, label = "MERS") +
   ggplot2::scale_y_continuous(
-    name = "Probability of large outbreak", 
+    name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
   ggplot2::scale_x_continuous(name = "Extent of homogeneity in transmission") +
@@ -105,14 +111,14 @@ introductions <- subset(epidemic_params, k == 0.5)
 ggplot2::ggplot(data = introductions) +
   ggplot2::geom_pointrange(
     mapping = ggplot2::aes(
-      x = a, 
-      y = prob_epidemic, 
-      ymin = prob_epidemic_lw, 
+      x = a,
+      y = prob_epidemic,
+      ymin = prob_epidemic_lw,
       ymax = prob_epidemic_up
     )
   ) +
   ggplot2::scale_y_continuous(
-    name = "Probability of large outbreak", 
+    name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
   ggplot2::scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
@@ -126,13 +132,13 @@ Different levels of heterogeneity in transmission will produce different probabi
 ggplot2::ggplot(data = epidemic_params) +
   ggplot2::geom_point(
     mapping = ggplot2::aes(
-      x = a, 
+      x = a,
       y = prob_epidemic,
       colour = k
     )
   ) +
   ggplot2::scale_y_continuous(
-    name = "Probability of large outbreak", 
+    name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
   ggplot2::labs(colour = "Dispersion (*k*)") +
@@ -158,7 +164,7 @@ $R$.
 
 ```{r, prep-exinction-plot}
 extinction_params <- expand.grid(
-  R = seq(0, 5, 0.1), 
+  R = seq(0, 5, 0.1),
   k = c(0.01, 0.1, 0.5, 1, 4, Inf),
   a = 1
 )
@@ -173,13 +179,22 @@ extinction_params <- cbind(extinction_params, prob_extinct)
 ```{r, plot-extinction, class.source = 'fold-hide', fig.cap="The probability that an infectious disease will go extinct for a given value of $R$ and $k$. This is calculated using `probability_extinct()` function. This plot is reproduced from @lloyd-smithSuperspreadingEffectIndividual2005 figure 2B.", fig.width = 8, fig.height = 5}
 # plot probability of extinction across R for multiple k
 ggplot2::ggplot(data = extinction_params) +
-  ggplot2::geom_point(mapping = ggplot2::aes(x = R, y = prob_extinct, colour = factor(k))) +
-    ggplot2::scale_y_continuous(
-    name = "Probability of extinction", 
+  ggplot2::geom_point(
+    mapping = ggplot2::aes(
+      x = R,
+      y = prob_extinct,
+      colour = factor(k)
+    )
+  ) +
+  ggplot2::scale_y_continuous(
+    name = "Probability of extinction",
     limits = c(0, 1)
   ) +
   ggplot2::labs(colour = "Dispersion (*k*)") +
-  ggplot2::scale_x_continuous(name = "Reproductive number (*R*)", n.breaks = 6) +
+  ggplot2::scale_x_continuous(
+    name = "Reproductive number (*R*)",
+    n.breaks = 6
+  ) +
   ggplot2::scale_color_viridis_d() +
   ggplot2::theme_bw() +
   ggplot2::theme(
@@ -203,10 +218,18 @@ For example the proportion of transmission events in a given cluster size can be
 
 ```{r}
 # For R = 0.8
-proportion_cluster_size(R = 0.8, k = seq(0.1, 1, 0.1), cluster_size = c(5, 10, 25))
+proportion_cluster_size(
+  R = 0.8,
+  k = seq(0.1, 1, 0.1),
+  cluster_size = c(5, 10, 25)
+)
 
 # For R = 3
-proportion_cluster_size(R = 0.8, k = seq(0.1, 1, 0.1), cluster_size = c(5, 10, 25))
+proportion_cluster_size(
+  R = 0.8,
+  k = seq(0.1, 1, 0.1),
+  cluster_size = c(5, 10, 25)
+)
 ```
 
 These results indicate whether preventing gatherings of a certain size can reduce the epidemic
@@ -225,12 +248,12 @@ contain_params <- expand.grid(
 )
 prob_contain <- apply(contain_params, 1, function(x) {
   probability_contain(
-    R = x[["R"]], 
-    k = x[["k"]], 
-    a = x[["a"]], 
-    c = x[["c"]], 
-    control_type = "population", 
-    stochastic = TRUE, 
+    R = x[["R"]],
+    k = x[["k"]],
+    a = x[["a"]],
+    c = x[["c"]],
+    control_type = "population",
+    stochastic = TRUE,
     case_threshold = 100
   )
 })
@@ -242,20 +265,21 @@ contain_params <- cbind(contain_params, prob_contain)
 ggplot2::ggplot(data = contain_params) +
   ggplot2::geom_point(
     mapping = ggplot2::aes(
-      x = c, 
+      x = c,
       y = prob_contain,
       colour = factor(k)
     )
-  ) + 
+  ) +
   ggplot2::scale_y_continuous(
-    name = "Probability of containment", 
+    name = "Probability of containment",
     limits = c(0, 1)
   ) +
   ggplot2::scale_x_continuous(name = "Control measures (*c*)", n.breaks = 6) +
   ggplot2::labs(colour = "Dispersion (*k*)") +
   ggplot2::scale_color_viridis_d() +
   ggplot2::theme_bw() +
-  ggplot2::theme(axis.title.x = ggtext::element_markdown(),
-                 legend.title = ggtext::element_markdown())
-
+  ggplot2::theme(
+    axis.title.x = ggtext::element_markdown(),
+    legend.title = ggtext::element_markdown()
+  )
 ```

From 0c4be283ceee202db22e00cf13656f4f75c28236 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:51:37 +0100
Subject: [PATCH 10/20] use call. = FALSE in stop()

---
 R/probability_contain.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/probability_contain.R b/R/probability_contain.R
index 6cf1bc1..228299d 100644
--- a/R/probability_contain.R
+++ b/R/probability_contain.R
@@ -34,12 +34,13 @@ probability_contain <- function(R, k, a = 1, c, # nolint
   if (control_type == "population") {
     R <- (1 - c) * R # nolint
   } else {
-    stop("individual-level controls not yet implemented")
+    stop("individual-level controls not yet implemented", call. = FALSE)
   }
 
   if (a != 1) {
     stop(
-      "Multiple introductions is not yet implemented for probability_contain"
+      "Multiple introductions is not yet implemented for probability_contain",
+      call. = FALSE
     )
   }
 

From 2ce6fca52262082e75a99f39d20e9f6648d1e18e Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:58:37 +0100
Subject: [PATCH 11/20] updated probability_contain documentation

---
 R/probability_contain.R    | 5 ++---
 man/probability_contain.Rd | 7 ++++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/probability_contain.R b/R/probability_contain.R
index 228299d..ba080a0 100644
--- a/R/probability_contain.R
+++ b/R/probability_contain.R
@@ -1,8 +1,7 @@
 #' Probability that an outbreak will be contained
 #'
-#' @description
-#' Containment is defined as not reaching 100 cases
-#'
+#' @description Containment is defined as the size of the transmission chain
+#' not reaching the `case_threshold` (default = 100).
 #'
 #' @inheritParams probability_epidemic
 #' @param c Control strength, 0 is no control measures, 1 is complete control.
diff --git a/man/probability_contain.Rd b/man/probability_contain.Rd
index 9bdc09e..f202ae2 100644
--- a/man/probability_contain.Rd
+++ b/man/probability_contain.Rd
@@ -26,8 +26,8 @@ offspring distribution from fitted negative binomial)}
 
 \item{c}{Control strength, 0 is no control measures, 1 is complete control.}
 
-\item{control_type}{Either \code{"population"} or \code{"individual"} for population-level
-or individual-level control measures.}
+\item{control_type}{Either \code{"population"} or \code{"individual"} for
+population-level or individual-level control measures.}
 
 \item{stochastic}{Whether to use a stochastic branching process model or the
 probability of extinction.}
@@ -41,7 +41,8 @@ which the epidemic is considered contained.}
 A number for the probability of containment
 }
 \description{
-Containment is defined as not reaching 100 cases
+Containment is defined as the size of the transmission chain
+not reaching the \code{case_threshold} (default = 100).
 }
 \examples{
 probability_contain(R = 1.5, k = 0.5, c = 1)

From 88e9d26ee33748136796e4cf8414a099aefb2f45 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 15:58:56 +0100
Subject: [PATCH 12/20] added library(ggtext) and removed all explicit
 namespacing

---
 vignettes/epidemic_risk.Rmd | 101 ++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 50 deletions(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index aef016a..381054b 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -36,6 +36,7 @@ This vignette explores the applications of the functions included the {superspre
 ```{r setup}
 library(superspreading)
 library(ggplot2)
+library(ggtext)
 ```
 
 Early in the COVID-19 epidemic @kucharskiEarlyDynamicsTransmission2020 investigated
@@ -65,32 +66,32 @@ homogeneity <- subset(epidemic_params, a == 1)
 
 ```{r, plot-dispersion, class.source = 'fold-hide', fig.cap="The probability that an initial infection (introduction) will cause a sustained outbreak (transmission chain). The dispersion of the individual-level transmission is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. This plot is reproduced from Kucharski et al. (2020) figure 3A.", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across dispersion
-ggplot2::ggplot(data = homogeneity) +
-  ggplot2::geom_ribbon(
-    mapping = ggplot2::aes(
+ggplot(data = homogeneity) +
+  geom_ribbon(
+    mapping = aes(
       x = k,
       ymin = prob_epidemic_lw,
       ymax = prob_epidemic_up
     ),
     fill = "grey70"
   ) +
-  ggplot2::geom_line(mapping = ggplot2::aes(x = k, y = prob_epidemic)) +
-  ggplot2::geom_vline(
-    mapping = ggplot2::aes(xintercept = 0.2),
+  geom_line(mapping = aes(x = k, y = prob_epidemic)) +
+  geom_vline(
+    mapping = aes(xintercept = 0.2),
     linetype = "dashed"
   ) +
-  ggplot2::annotate(geom = "text", x = 0.15, y = 0.75, label = "SARS") +
-  ggplot2::geom_vline(
-    mapping = ggplot2::aes(xintercept = 0.4),
+  annotate(geom = "text", x = 0.15, y = 0.75, label = "SARS") +
+  geom_vline(
+    mapping = aes(xintercept = 0.4),
     linetype = "dashed"
   ) +
-  ggplot2::annotate(geom = "text", x = 0.45, y = 0.75, label = "MERS") +
-  ggplot2::scale_y_continuous(
+  annotate(geom = "text", x = 0.45, y = 0.75, label = "MERS") +
+  scale_y_continuous(
     name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
-  ggplot2::scale_x_continuous(name = "Extent of homogeneity in transmission") +
-  ggplot2::theme_bw()
+  scale_x_continuous(name = "Extent of homogeneity in transmission") +
+  theme_bw()
 ```
 
 The degree of variability in transmission increases as $k$ decreases. So the probability
@@ -108,44 +109,44 @@ introductions <- subset(epidemic_params, k == 0.5)
 
 ```{r, plot-introductions, class.source = 'fold-hide', fig.cap="The probability that an a number of introduction events will cause a sustained outbreak (transmission chain). The number of disease introductions is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. This plot is reproduced from Kucharski et al. (2020) figure 3B.", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across introductions
-ggplot2::ggplot(data = introductions) +
-  ggplot2::geom_pointrange(
-    mapping = ggplot2::aes(
+ggplot(data = introductions) +
+  geom_pointrange(
+    mapping = aes(
       x = a,
       y = prob_epidemic,
       ymin = prob_epidemic_lw,
       ymax = prob_epidemic_up
     )
   ) +
-  ggplot2::scale_y_continuous(
+  scale_y_continuous(
     name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
-  ggplot2::scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
-  ggplot2::theme_bw()
+  scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
+  theme_bw()
 ```
 
 Different levels of heterogeneity in transmission will produce different probabilities of epidemics.
 
 ```{r, plot-introductions-multi-k, class.source = 'fold-hide', fig.cap="The probability that an a number of introduction events will cause a sustained outbreak (transmission chain). The number of disease introductions is plotted on the x-axis and probability of outbreak -- calculated using `probability_epidemic()` -- is on the y-axis. Different values of dispersion are plotted to show the effect of increased transmission variability on an epidemic establishing", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across introductions for multiple k
-ggplot2::ggplot(data = epidemic_params) +
-  ggplot2::geom_point(
-    mapping = ggplot2::aes(
+ggplot(data = epidemic_params) +
+  geom_point(
+    mapping = aes(
       x = a,
       y = prob_epidemic,
       colour = k
     )
   ) +
-  ggplot2::scale_y_continuous(
+  scale_y_continuous(
     name = "Probability of large outbreak",
     limits = c(0, 1)
   ) +
-  ggplot2::labs(colour = "Dispersion (*k*)") +
-  ggplot2::scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
-  ggplot2::scale_colour_continuous(type = "viridis") +
-  ggplot2::theme_bw() +
-  ggplot2::theme(legend.title = ggtext::element_markdown())
+  labs(colour = "Dispersion (*k*)") +
+  scale_x_continuous(name = "Number of introductions", n.breaks = 6) +
+  scale_colour_continuous(type = "viridis") +
+  theme_bw() +
+  theme(legend.title = element_markdown())
 ```
 
 In the above plot we drop the uncertainty around each point and assume a known value
@@ -178,28 +179,28 @@ extinction_params <- cbind(extinction_params, prob_extinct)
 
 ```{r, plot-extinction, class.source = 'fold-hide', fig.cap="The probability that an infectious disease will go extinct for a given value of $R$ and $k$. This is calculated using `probability_extinct()` function. This plot is reproduced from @lloyd-smithSuperspreadingEffectIndividual2005 figure 2B.", fig.width = 8, fig.height = 5}
 # plot probability of extinction across R for multiple k
-ggplot2::ggplot(data = extinction_params) +
-  ggplot2::geom_point(
-    mapping = ggplot2::aes(
+ggplot(data = extinction_params) +
+  geom_point(
+    mapping = aes(
       x = R,
       y = prob_extinct,
       colour = factor(k)
     )
   ) +
-  ggplot2::scale_y_continuous(
+  scale_y_continuous(
     name = "Probability of extinction",
     limits = c(0, 1)
   ) +
-  ggplot2::labs(colour = "Dispersion (*k*)") +
-  ggplot2::scale_x_continuous(
+  labs(colour = "Dispersion (*k*)") +
+  scale_x_continuous(
     name = "Reproductive number (*R*)",
     n.breaks = 6
   ) +
-  ggplot2::scale_color_viridis_d() +
-  ggplot2::theme_bw() +
-  ggplot2::theme(
-    axis.title.x = ggtext::element_markdown(),
-    legend.title = ggtext::element_markdown()
+  scale_color_viridis_d() +
+  theme_bw() +
+  theme(
+    axis.title.x = element_markdown(),
+    legend.title = element_markdown()
   )
 ```
 
@@ -262,24 +263,24 @@ contain_params <- cbind(contain_params, prob_contain)
 
 ```{r, plot-containment, class.source = 'fold-hide', fig.cap="The probability that an outbreak will be contained (i.e. not exceed 100 chains) for a variety of population-level control measures. The probability of containment is calculated using `probability_contain()`. This plot is reproduced from Lloyd-Smith et al. (2005) figure 3C.", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across introductions for multiple k
-ggplot2::ggplot(data = contain_params) +
-  ggplot2::geom_point(
-    mapping = ggplot2::aes(
+ggplot(data = contain_params) +
+  geom_point(
+    mapping = aes(
       x = c,
       y = prob_contain,
       colour = factor(k)
     )
   ) +
-  ggplot2::scale_y_continuous(
+  scale_y_continuous(
     name = "Probability of containment",
     limits = c(0, 1)
   ) +
-  ggplot2::scale_x_continuous(name = "Control measures (*c*)", n.breaks = 6) +
-  ggplot2::labs(colour = "Dispersion (*k*)") +
-  ggplot2::scale_color_viridis_d() +
-  ggplot2::theme_bw() +
-  ggplot2::theme(
-    axis.title.x = ggtext::element_markdown(),
-    legend.title = ggtext::element_markdown()
+  scale_x_continuous(name = "Control measures (*c*)", n.breaks = 6) +
+  labs(colour = "Dispersion (*k*)") +
+  scale_color_viridis_d() +
+  theme_bw() +
+  theme(
+    axis.title.x = element_markdown(),
+    legend.title = element_markdown()
   )
 ```

From 12dbef2aeccee96bd6505a3f967d8678ca40a52d Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Thu, 8 Jun 2023 16:34:06 +0100
Subject: [PATCH 13/20] updates to epidemic_risk vignette, closes #12

---
 vignettes/epidemic_risk.Rmd | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index 381054b..1b5817b 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -1,5 +1,6 @@
 ---
 title: "Epidemic Risk"
+subtitle: "Superspreading on policy, decision making and control measures"
 output: 
   bookdown::html_vignette2:
     code_folding: show
@@ -29,7 +30,7 @@ under control (e.g. $R$ < 1) and increase the probability that an outbreak goes
 A recent example of where individual-level transmission and superspreading was analysed
 to understand disease transmission was by the [Scientific Advisory Group for Emergencies 
 (SAGE)](https://www.gov.uk/government/organisations/scientific-advisory-group-for-emergencies) 
-for the [UK's response COVID-19](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020). 
+for the [UK's COVID-19 response](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020). 
 
 This vignette explores the applications of the functions included the {superspreading} package and their visualiation in understanding and informing decision makers for a variety of outbreak scenarios.
 
@@ -43,7 +44,7 @@ Early in the COVID-19 epidemic @kucharskiEarlyDynamicsTransmission2020 investiga
 whether data on SARS-CoV-2 transmission displayed signatures of overdispersion (i.e.
 evidence of superspreading events). This was in light of evidence that other corona virus
 outbreaks (Severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS))
-had experienced superspreading events. The detection of variability in individual-transmission
+had superspreading events. The detection of variability in individual-transmission
 is important in understanding or predicting the likelihood of transmission establishing
 when imported into a new location (e.g. country). 
 
@@ -95,7 +96,7 @@ ggplot(data = homogeneity) +
 ```
 
 The degree of variability in transmission increases as $k$ decreases. So the probability
-of a large outbreak is smaller for smaller values of $k$, meaning that if SARS-CoV-2
+of a large outbreak is smaller for smaller values of $k$, meaning that if COVID-19
 is more similar to SARS than MERS it will be less likely to establish and cause an 
 outbreak if introduced into a newly susceptible population.
 
@@ -150,11 +151,10 @@ ggplot(data = epidemic_params) +
 ```
 
 In the above plot we drop the uncertainty around each point and assume a known value
-of R in order to more clearly show the pattern.
+of $R$ in order to more clearly show the pattern.
 
 Applications that make it easy to explore this functionality and compare between existing
-parameter estimates of offspring distribution are also useful when
-knowledge of superspreading in the early stages of an outbreak. An example of this is the
+parameter estimates of offspring distributions are also useful. An example of this is the
 [Shiny app developed by the Centre for Mathematical Modelling of Infectious diseases](https://cmmid.github.io/visualisations/new-outbreak-probability) at the London
 School of Hygiene and Tropical Medicine, for comparing SARS-like and MERS-like scenarios, 
 as well as random mixing.
@@ -212,9 +212,9 @@ of the same pathogen. Alternatively, if it is an outbreak of a novel pathogen, p
 for similar pathogens can be searched for. 
 
 This is accomplished by another Epiverse-TRACE R package: [{epiparameter}](https://epiverse-trace.github.io/epiparameter/index.html). This package
-contains a library of epidemiological parameters included offspring distribution parameter estimates and uncertainty with the corresponding source (e.g. published article). This aids the collation and establish a current understanding as was done for [COVID-19](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020).
+contains a library of epidemiological parameters included offspring distribution parameter estimates and uncertainty obtained from published literature. This aids the collation and establishes a baseline understanding of transmission. Similar to the collation of transmission clusters for [COVID-19](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020).
 
-From these parameter estimates metrics to give an intuitive understanding of transmission patterns can be tabulated.
+Equipped with these parameter estimates, metrics that provide an intuitive understanding of transmission patterns can be tabulated.
 For example the proportion of transmission events in a given cluster size can be calculated using the {superspreading} function `proportion_cluster_size()`.
 
 ```{r}
@@ -241,7 +241,7 @@ particular types of gatherings to most effectively bring down the reproduction n
 
 ## Controls on transmission
 
-In their work on identifying the prevalence of overdispersion in individual-level transmission, @lloyd-smithSuperspreadingEffectIndividual2005 also showed the effect of control measures on the probability of containment. They defined containment as the size of a transmission chain not reaching 100 cases. They assumed control measures could be either population-level, or individual-level. We `probability_contain()` to calculate the probability that disease spread will go extinct before reaching a threshold size.
+In their work on identifying the prevalence of overdispersion in individual-level transmission, @lloyd-smithSuperspreadingEffectIndividual2005 also showed the effect of control measures on the probability of containment. They defined containment as the size of a transmission chain not reaching 100 cases. They assumed control measures could be either population-level, or individual-level. {superspreading} provides `probability_contain()` to calculate the probability that disease spread will go extinct before reaching a threshold size.
 
 ```{r, prep-containment-plot}
 contain_params <- expand.grid(
@@ -261,7 +261,7 @@ prob_contain <- apply(contain_params, 1, function(x) {
 contain_params <- cbind(contain_params, prob_contain)
 ```
 
-```{r, plot-containment, class.source = 'fold-hide', fig.cap="The probability that an outbreak will be contained (i.e. not exceed 100 chains) for a variety of population-level control measures. The probability of containment is calculated using `probability_contain()`. This plot is reproduced from Lloyd-Smith et al. (2005) figure 3C.", fig.width = 8, fig.height = 5}
+```{r, plot-containment, class.source = 'fold-hide', fig.cap="The probability that an outbreak will be contained (i.e. not exceed 100 cases) for a variety of population-level control measures. The probability of containment is calculated using `probability_contain()`. This plot is reproduced from Lloyd-Smith et al. (2005) figure 3C.", fig.width = 8, fig.height = 5}
 # plot probability of epidemic across introductions for multiple k
 ggplot(data = contain_params) +
   geom_point(

From 51c176f74934986191b34983ef46ca16f17206d2 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 11:28:38 +0100
Subject: [PATCH 14/20] add input checking to probability_contain

---
 R/probability_contain.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/probability_contain.R b/R/probability_contain.R
index ba080a0..46ea052 100644
--- a/R/probability_contain.R
+++ b/R/probability_contain.R
@@ -29,6 +29,14 @@ probability_contain <- function(R, k, a = 1, c, # nolint
                                 stochastic = TRUE,
                                 ...,
                                 case_threshold = 100) {
+  # check inputs
+  checkmate::assertNumber(R)
+  checkmate::assertNumber(k)
+  checkmate::assertCount(a)
+  checkmate::assertNumber(c, lower = 0, upper = 1)
+  checkmate::assertLogical(stochastic)
+  checkmate::assertNumber(case_threshold)
+
   control_type <- match.arg(control_type)
   if (control_type == "population") {
     R <- (1 - c) * R # nolint

From b81394bcd54403421f720eb482a3158602440767 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 11:39:23 +0100
Subject: [PATCH 15/20] added box linking to get started vignette in
 epidemic_risk vignette

---
 vignettes/epidemic_risk.Rmd | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index 1b5817b..cfb42a9 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -21,6 +21,12 @@ knitr::opts_chunk$set(
 )
 ```
 
+::: {.alert .alert-info}
+**New to the {superspreading} package?**
+
+See the [Get started](https://epiverse-trace.github.io/superspreading/articles/superspreading.html) vignette for an introduction to the concepts discussed below and some of the functions included in the R package.
+:::
+
 During the early stages of an infectious disease outbreak information on transmission
 clusters can provide insight into the occurrence of superspreading events and from this
 the probability an outbreak will cause an epidemic. This has implications for policy

From 65c695c03e24cd0f2055dead08415310a07d2bab Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 11:45:04 +0100
Subject: [PATCH 16/20] Incorporate comments on epidemic_risk vig text

Co-authored-by: James Azam <james.m.azam@gmail.com>
---
 vignettes/epidemic_risk.Rmd | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index cfb42a9..ca9f031 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -28,17 +28,17 @@ See the [Get started](https://epiverse-trace.github.io/superspreading/articles/s
 :::
 
 During the early stages of an infectious disease outbreak information on transmission
-clusters can provide insight into the occurrence of superspreading events and from this
+clusters and heterogeneity in individual-level transmission can provide insight into the occurrence of superspreading events and from this
 the probability an outbreak will cause an epidemic. This has implications for policy
-decisions whether control interventions are required to bring disease transmission 
+decisions on how to bring disease transmission 
 under control (e.g. $R$ < 1) and increase the probability that an outbreak goes extinct.
 
-A recent example of where individual-level transmission and superspreading was analysed
+A recent example of where individual-level transmission and superspreading were analysed
 to understand disease transmission was by the [Scientific Advisory Group for Emergencies 
 (SAGE)](https://www.gov.uk/government/organisations/scientific-advisory-group-for-emergencies) 
 for the [UK's COVID-19 response](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020). 
 
-This vignette explores the applications of the functions included the {superspreading} package and their visualiation in understanding and informing decision makers for a variety of outbreak scenarios.
+This vignette explores the applications of the functions included in the {superspreading} package and their visualization in understanding and informing decision-makers for a variety of outbreak scenarios.
 
 ```{r setup}
 library(superspreading)
@@ -46,9 +46,8 @@ library(ggplot2)
 library(ggtext)
 ```
 
-Early in the COVID-19 epidemic @kucharskiEarlyDynamicsTransmission2020 investigated
-whether data on SARS-CoV-2 transmission displayed signatures of overdispersion (i.e.
-evidence of superspreading events). This was in light of evidence that other corona virus
+Early in the COVID-19 epidemic, a number of studies including @kucharskiEarlyDynamicsTransmission2020 investigated
+whether data on SARS-CoV-2 incidence had evidence of superspreading events. These events are often estimated as some measure of overdispersion. This was in light of evidence that other corona virus
 outbreaks (Severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS))
 had superspreading events. The detection of variability in individual-transmission
 is important in understanding or predicting the likelihood of transmission establishing
@@ -247,7 +246,7 @@ particular types of gatherings to most effectively bring down the reproduction n
 
 ## Controls on transmission
 
-In their work on identifying the prevalence of overdispersion in individual-level transmission, @lloyd-smithSuperspreadingEffectIndividual2005 also showed the effect of control measures on the probability of containment. They defined containment as the size of a transmission chain not reaching 100 cases. They assumed control measures could be either population-level, or individual-level. {superspreading} provides `probability_contain()` to calculate the probability that disease spread will go extinct before reaching a threshold size.
+In their work on identifying the prevalence of overdispersion in individual-level transmission, @lloyd-smithSuperspreadingEffectIndividual2005 also showed the effect of control measures on the probability of containment. They defined containment as the size of a transmission chain not reaching 100 cases. They assumed the implementation of control measures at the individual or population level. {superspreading} provides `probability_contain()` to calculate the probability that an outbreak will go extinct before reaching a threshold size.
 
 ```{r, prep-containment-plot}
 contain_params <- expand.grid(

From b28bfdccf63a24721a344964414f40206df567db Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 11:55:24 +0100
Subject: [PATCH 17/20] added Lloyd-Smith ref to epidemic_risk variability
 statement

---
 vignettes/epidemic_risk.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index ca9f031..0296fb3 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -100,8 +100,8 @@ ggplot(data = homogeneity) +
   theme_bw()
 ```
 
-The degree of variability in transmission increases as $k$ decreases. So the probability
-of a large outbreak is smaller for smaller values of $k$, meaning that if COVID-19
+The degree of variability in transmission increases as $k$ decreases (@lloyd-smithSuperspreadingEffectIndividual2005). 
+So the probability of a large outbreak is smaller for smaller values of $k$, meaning that if COVID-19
 is more similar to SARS than MERS it will be less likely to establish and cause an 
 outbreak if introduced into a newly susceptible population.
 

From 5c05e2da632b14a68f48b56782872c7db915f76e Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 11:56:36 +0100
Subject: [PATCH 18/20] Fix typo in epidemic_risk code chunk

Co-authored-by: James Azam <james.m.azam@gmail.com>
---
 vignettes/epidemic_risk.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index 0296fb3..c57f5f3 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -232,7 +232,7 @@ proportion_cluster_size(
 
 # For R = 3
 proportion_cluster_size(
-  R = 0.8,
+  R = 3,
   k = seq(0.1, 1, 0.1),
   cluster_size = c(5, 10, 25)
 )

From 4b53b8d476c063198479ba167f2d9a7f5c440463 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 14:56:58 +0100
Subject: [PATCH 19/20] fixed spelling

---
 inst/WORDLIST               | 13 +++++++++++++
 vignettes/epidemic_risk.Rmd |  6 +++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/inst/WORDLIST b/inst/WORDLIST
index b1cec70..a6cdbf4 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,6 +1,8 @@
 Althaus
+aes
 Barré
 Boëlle
+bw
 Camacho
 CMD
 COVID
@@ -18,16 +20,27 @@ Getz
 Heleze
 Kopp
 Lifecycle
+linetype
 Liu
 Loucoubar
+lw
 Magassouba
+MERS
 N’Faly
 org
 Oumar
 Ousmane
+params
+pointrange
+prob
 Schreiber
 Soropogui
+viridis
+vline
 Wellcome
+xintercept
+ymax
+ymin
 al
 doi
 epiparameter
diff --git a/vignettes/epidemic_risk.Rmd b/vignettes/epidemic_risk.Rmd
index c57f5f3..7093bdd 100644
--- a/vignettes/epidemic_risk.Rmd
+++ b/vignettes/epidemic_risk.Rmd
@@ -38,7 +38,7 @@ to understand disease transmission was by the [Scientific Advisory Group for Eme
 (SAGE)](https://www.gov.uk/government/organisations/scientific-advisory-group-for-emergencies) 
 for the [UK's COVID-19 response](https://www.gov.uk/government/publications/analysis-of-sars-cov-2-transmission-clusters-and-superspreading-events-3-june-2020). 
 
-This vignette explores the applications of the functions included in the {superspreading} package and their visualization in understanding and informing decision-makers for a variety of outbreak scenarios.
+This vignette explores the applications of the functions included in the {superspreading} package and their visualisation in understanding and informing decision-makers for a variety of outbreak scenarios.
 
 ```{r setup}
 library(superspreading)
@@ -201,7 +201,7 @@ ggplot(data = extinction_params) +
     name = "Reproductive number (*R*)",
     n.breaks = 6
   ) +
-  scale_color_viridis_d() +
+  scale_colour_viridis_d() +
   theme_bw() +
   theme(
     axis.title.x = element_markdown(),
@@ -282,7 +282,7 @@ ggplot(data = contain_params) +
   ) +
   scale_x_continuous(name = "Control measures (*c*)", n.breaks = 6) +
   labs(colour = "Dispersion (*k*)") +
-  scale_color_viridis_d() +
+  scale_colour_viridis_d() +
   theme_bw() +
   theme(
     axis.title.x = element_markdown(),

From 4864c8ce710a0a54bfe4852ebfdd38ca6a5ab840 Mon Sep 17 00:00:00 2001
From: Joshua Lambert <joshua.lambert@lshtm.ac.uk>
Date: Mon, 12 Jun 2023 14:57:09 +0100
Subject: [PATCH 20/20] updated pkgdown reference

---
 _pkgdown.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 90883bd..d01ab39 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -17,8 +17,8 @@ reference:
     contents:
     - ends_with("poisweibull")
 
-  - title: Probability of epidemic or extinction
-    desc: Probability of a disease causing an epidemic or going extinct
+  - title: Probability of epidemic, extinction or containment
+    desc: Probability of a disease causing an epidemic, going extinct or being contained
     contents:
     - starts_with("probability")
 
@@ -32,3 +32,7 @@ articles:
   navbar: Parameter estimation
   contents:
   - estimate_individual_level_transmission
+- title: Superspreading for decision-making
+  navbar: Superspreading for decision-making
+  contents:
+  - epidemic_risk