From 18336c70dd30720cdc1b12d0c2d8083ebf10a346 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Mon, 4 Sep 2023 17:54:33 +0100 Subject: [PATCH 01/57] Delete duplicated return value in function doc --- R/likelihood.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index a9f566f0..07b2aec4 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -13,7 +13,6 @@ #' contributions will be returned rather than the sum. #' @param ... Parameters for the offspring distribution. #' @return -#' * A log-likelihood, if \code{log = TRUE} (the default) #' * A vector of log-likelihoods, if \code{log = TRUE} (the default) and #' \code{obs_prob < 1}, or #' * A list of individual log-likelihood contributions, if From f31ee9b67e97d0b10449a15affc502e065a61908 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:11:27 +0100 Subject: [PATCH 02/57] Replace "likelihood" with "log-likelihood" to clarify that the latter is being calculated --- R/stat_likelihoods.R | 17 +++++++++-------- man/gborel_size_ll.Rd | 4 ++-- man/geom_length_ll.Rd | 4 ++-- man/likelihood.Rd | 1 - man/nbinom_size_ll.Rd | 4 ++-- man/offspring_ll.Rd | 7 ++++--- man/pois_length_ll.Rd | 4 ++-- man/pois_size_ll.Rd | 4 ++-- 8 files changed, 23 insertions(+), 22 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index f54b7736..028fba92 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -1,4 +1,4 @@ -#' Likelihood of the size of chains with Poisson offspring distribution +#' Log-likelihood of the size of chains with Poisson offspring distribution #' #' @param x vector of sizes #' @param lambda rate of the Poisson distribution @@ -9,7 +9,7 @@ pois_size_ll <- function(x, lambda) { (x - 1) * log(lambda) - lambda * x + (x - 2) * log(x) - lgamma(x) } -#' Likelihood of the size of chains with Negative-Binomial offspring +#' Log-likelihood of the size of chains with Negative-Binomial offspring #' distribution #' #' @param x vector of sizes @@ -31,7 +31,7 @@ nbinom_size_ll <- function(x, size, prob, mu) { (size * x + (x - 1)) * log(1 + mu / size) } -#' Likelihood of the size of chains with gamma-Borel offspring distribution +#' Log-likelihood of the size of chains with gamma-Borel offspring distribution #' #' @param x vector of sizes #' @param size the dispersion parameter (often called \code{k} in ecological @@ -52,7 +52,7 @@ gborel_size_ll <- function(x, size, prob, mu) { (x - 1) * log(x) - (size + x - 1) * log(x + size / mu) } -#' Likelihood of the length of chains with Poisson offspring distribution +#' Log-likelihood of the length of chains with Poisson offspring distribution #' #' @param x vector of sizes #' @param lambda rate of the Poisson distribution @@ -70,7 +70,7 @@ pois_length_ll <- function(x, lambda) { log(Gk[x + 1] - Gk[x]) } -#' Likelihood of the length of chains with geometric offspring distribution +#' Log-likelihood of the length of chains with geometric offspring distribution #' #' @param x vector of sizes #' @param prob probability of the geometric distribution with mean @@ -86,10 +86,11 @@ geom_length_ll <- function(x, prob) { log(GkmGkm1) } -#' Likelihood of the length of chains with generic offspring distribution +#' Log-likelihood of the summary (size/length) of chains with generic offspring +#' distribution #' -#' The likelihoods are calculated with a crude approximation using simulated -#' chains by linearly approximating any missing values in the empirical +#' The log-likelihoods are calculated with a crude approximation using simulated +#' chain summaries by linearly approximating any missing values in the empirical #' cumulative distribution function (ecdf). #' @inheritParams likelihood #' @inheritParams simulate_vec diff --git a/man/gborel_size_ll.Rd b/man/gborel_size_ll.Rd index 618659f2..752aa56a 100644 --- a/man/gborel_size_ll.Rd +++ b/man/gborel_size_ll.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stat_likelihoods.R \name{gborel_size_ll} \alias{gborel_size_ll} -\title{Likelihood of the size of chains with gamma-Borel offspring distribution} +\title{Log-likelihood of the size of chains with gamma-Borel offspring distribution} \usage{ gborel_size_ll(x, size, prob, mu) } @@ -21,7 +21,7 @@ applications)} log-likelihood values } \description{ -Likelihood of the size of chains with gamma-Borel offspring distribution +Log-likelihood of the size of chains with gamma-Borel offspring distribution } \author{ Sebastian Funk diff --git a/man/geom_length_ll.Rd b/man/geom_length_ll.Rd index f200df93..6c7dc6ad 100644 --- a/man/geom_length_ll.Rd +++ b/man/geom_length_ll.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stat_likelihoods.R \name{geom_length_ll} \alias{geom_length_ll} -\title{Likelihood of the length of chains with geometric offspring distribution} +\title{Log-likelihood of the length of chains with geometric offspring distribution} \usage{ geom_length_ll(x, prob) } @@ -16,7 +16,7 @@ geom_length_ll(x, prob) log-likelihood values } \description{ -Likelihood of the length of chains with geometric offspring distribution +Log-likelihood of the length of chains with geometric offspring distribution } \author{ Sebastian Funk diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 3bd9e76e..b5b77844 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -52,7 +52,6 @@ contributions will be returned rather than the sum.} } \value{ \itemize{ -\item A log-likelihood, if \code{log = TRUE} (the default) \item A vector of log-likelihoods, if \code{log = TRUE} (the default) and \code{obs_prob < 1}, or \item A list of individual log-likelihood contributions, if diff --git a/man/nbinom_size_ll.Rd b/man/nbinom_size_ll.Rd index 14003322..6bbd2475 100644 --- a/man/nbinom_size_ll.Rd +++ b/man/nbinom_size_ll.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stat_likelihoods.R \name{nbinom_size_ll} \alias{nbinom_size_ll} -\title{Likelihood of the size of chains with Negative-Binomial offspring +\title{Log-likelihood of the size of chains with Negative-Binomial offspring distribution} \usage{ nbinom_size_ll(x, size, prob, mu) @@ -22,7 +22,7 @@ applications)} log-likelihood values } \description{ -Likelihood of the size of chains with Negative-Binomial offspring +Log-likelihood of the size of chains with Negative-Binomial offspring distribution } \author{ diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 65763d76..d0edde23 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/stat_likelihoods.R \name{offspring_ll} \alias{offspring_ll} -\title{Likelihood of the length of chains with generic offspring distribution} +\title{Log-likelihood of the summary (size/length) of chains with generic offspring +distribution} \usage{ offspring_ll( chains, @@ -36,11 +37,11 @@ to TRUE).} \item{...}{any parameters to pass to \code{\link{simulate_tree}}} } \value{ -If \code{log = TRUE} (the default), log-likelihood values, +log-likelihood values else raw likelihoods } \description{ -The likelihoods are calculated with a crude approximation using simulated +The log-likelihoods are calculated with a crude approximation using simulated chains by linearly approximating any missing values in the empirical cumulative distribution function (ecdf). } diff --git a/man/pois_length_ll.Rd b/man/pois_length_ll.Rd index 63f6088e..bf1f47ba 100644 --- a/man/pois_length_ll.Rd +++ b/man/pois_length_ll.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stat_likelihoods.R \name{pois_length_ll} \alias{pois_length_ll} -\title{Likelihood of the length of chains with Poisson offspring distribution} +\title{Log-likelihood of the length of chains with Poisson offspring distribution} \usage{ pois_length_ll(x, lambda) } @@ -15,7 +15,7 @@ pois_length_ll(x, lambda) log-likelihood values } \description{ -Likelihood of the length of chains with Poisson offspring distribution +Log-likelihood of the length of chains with Poisson offspring distribution } \author{ Sebastian Funk diff --git a/man/pois_size_ll.Rd b/man/pois_size_ll.Rd index 00e662d0..5e0645f3 100644 --- a/man/pois_size_ll.Rd +++ b/man/pois_size_ll.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/stat_likelihoods.R \name{pois_size_ll} \alias{pois_size_ll} -\title{Likelihood of the size of chains with Poisson offspring distribution} +\title{Log-likelihood of the size of chains with Poisson offspring distribution} \usage{ pois_size_ll(x, lambda) } @@ -15,7 +15,7 @@ pois_size_ll(x, lambda) log-likelihood values } \description{ -Likelihood of the size of chains with Poisson offspring distribution +Log-likelihood of the size of chains with Poisson offspring distribution } \author{ Sebastian Funk From a5ec3d72128ce0b047eeebc065acaef245c72e50 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:13:11 +0100 Subject: [PATCH 03/57] Reword the docs of "offspring_ll" --- R/stat_likelihoods.R | 14 ++++++-------- man/offspring_ll.Rd | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 028fba92..befa7c3e 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -93,15 +93,13 @@ geom_length_ll <- function(x, prob) { #' chain summaries by linearly approximating any missing values in the empirical #' cumulative distribution function (ecdf). #' @inheritParams likelihood -#' @inheritParams simulate_vec -#' @param chains Vector of sizes/lengths +#' @inheritParams simulate_summary +#' @param chains Vector of chain summaries (sizes/lengths) #' @param nsim_offspring Number of simulations of the offspring distribution -#' for approximating the statistic (size/length) distribution -#' @param log Logical; Should the results be log-transformed? (Defaults -#' to TRUE). -#' @param ... any parameters to pass to \code{\link{simulate_tree}} -#' @return If \code{log = TRUE} (the default), log-likelihood values, -#' else raw likelihoods +#' for approximating the distribution of the chain statistic summary +#' (size/length) +#' @param ... any parameters to pass to \code{\link{simulate_summary}} +#' @return log-likelihood values #' @author Sebastian Funk #' @export offspring_ll <- function(chains, offspring_dist, statistic, diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index d0edde23..58381ff2 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -15,7 +15,7 @@ offspring_ll( ) } \arguments{ -\item{chains}{Vector of sizes/lengths} +\item{chains}{Vector of chain summaries (sizes/lengths)} \item{offspring_dist}{Offspring distribution: a character string corresponding to the R distribution function (e.g., "pois" for Poisson, @@ -29,22 +29,26 @@ numbers).} }} \item{nsim_offspring}{Number of simulations of the offspring distribution -for approximating the statistic (size/length) distribution} +for approximating the distribution of the chain statistic summary +(size/length)} -\item{log}{Logical; Should the results be log-transformed? (Defaults -to TRUE).} - -\item{...}{any parameters to pass to \code{\link{simulate_tree}}} +\item{...}{any parameters to pass to \code{\link{simulate_summary}}} } \value{ log-likelihood values -else raw likelihoods } \description{ The log-likelihoods are calculated with a crude approximation using simulated -chains by linearly approximating any missing values in the empirical +chain summaries by linearly approximating any missing values in the empirical cumulative distribution function (ecdf). } +\examples{ +set.seed(123) +} +\seealso{ +\code{\link[=simulate_summary]{simulate_summary()}} for simulating a summary of the transmission +chains statistic (without the tree of infections) +} \author{ Sebastian Funk } From 202956dbf344981ec7a69767acaf205fe86e8fc7 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:14:01 +0100 Subject: [PATCH 04/57] Revert to returning log values to comform with function name --- R/stat_likelihoods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index befa7c3e..81e88ee1 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -122,6 +122,6 @@ offspring_ll <- function(chains, offspring_dist, statistic, )$y)) lik <- acdf[chains] lik[is.na(lik)] <- 0 - out <- ifelse(base::isTRUE(log), log(lik), lik) + out <- log(lik) return(out) } From 74fb97309e98bccb4305bd5efaff20306d573811 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:14:26 +0100 Subject: [PATCH 05/57] Remove log argument --- R/stat_likelihoods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 81e88ee1..f6e38e09 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -103,7 +103,7 @@ geom_length_ll <- function(x, prob) { #' @author Sebastian Funk #' @export offspring_ll <- function(chains, offspring_dist, statistic, - nsim_offspring = 100, log = TRUE, ...) { + nsim_offspring = 100, ...) { # Simulate the chains chains <- simulate_summary( nsim_offspring, offspring_dist, From 5936f3106e5ce57a18f6deb2a49ac501227b811a Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:15:02 +0100 Subject: [PATCH 06/57] Explicitly assign arguments to avoid positioning matching --- R/stat_likelihoods.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index f6e38e09..ad435a35 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -106,8 +106,10 @@ offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains chains <- simulate_summary( - nsim_offspring, offspring_dist, - statistic, ... + nchains = nsim_offspring, + offspring_dist = offspring_dist, + statistic = statistic, + ... ) # Compute the empirical Cumulative Distribution Function of the From 8998bf1b58aef561ba7778c3d122aa2003feef4b Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:16:36 +0100 Subject: [PATCH 07/57] Add a seealso tag and clean up examples --- R/stat_likelihoods.R | 10 ++++++++++ man/offspring_ll.Rd | 9 +-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index ad435a35..07bce3ab 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -102,6 +102,16 @@ geom_length_ll <- function(x, prob) { #' @return log-likelihood values #' @author Sebastian Funk #' @export +#' @seealso [simulate_summary()] for simulating a summary of the transmission +#' chains statistic (without the tree of infections) +#' @examples +#' set.seed(123) +# chain_size_ll <- offspring_ll( +# chains = c(1, 5, 6, 8, 7, 8, 10), +# offspring_dist = "pois", +# statistic = "size", +# lambda = 2 +# ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 58381ff2..14b602f7 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -5,14 +5,7 @@ \title{Log-likelihood of the summary (size/length) of chains with generic offspring distribution} \usage{ -offspring_ll( - chains, - offspring_dist, - statistic, - nsim_offspring = 100, - log = TRUE, - ... -) +offspring_ll(chains, offspring_dist, statistic, nsim_offspring = 100, ...) } \arguments{ \item{chains}{Vector of chain summaries (sizes/lengths)} From c7cdf8fc2417538bf431bc8b7d13e4eef101ed05 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:12:04 +0100 Subject: [PATCH 08/57] Fixed the comment tags in the examples --- R/stat_likelihoods.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 07bce3ab..2fda56e4 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -106,12 +106,12 @@ geom_length_ll <- function(x, prob) { #' chains statistic (without the tree of infections) #' @examples #' set.seed(123) -# chain_size_ll <- offspring_ll( -# chains = c(1, 5, 6, 8, 7, 8, 10), -# offspring_dist = "pois", -# statistic = "size", -# lambda = 2 -# ) +#' chain_size_ll <- offspring_ll( +#' chains = c(1, 5, 6, 8, 7, 8, 10), +#' offspring_dist = "pois", +#' statistic = "size", +#' lambda = 2 +#' ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains From 85e98e9fdb8167ce8f0a1edfe72b06c8304bd359 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:32:26 +0100 Subject: [PATCH 09/57] Give nsim_obs a default value --- R/likelihood.R | 2 +- man/likelihood.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 07b2aec4..a50afc42 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -30,7 +30,7 @@ #' ) #' @export likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, - nsim_obs, log = TRUE, obs_prob = 1, stat_max = Inf, + nsim_obs = 100, log = TRUE, obs_prob = 1, stat_max = Inf, exclude = NULL, individual = FALSE, ...) { statistic <- match.arg(statistic) diff --git a/man/likelihood.Rd b/man/likelihood.Rd index b5b77844..57b64b5c 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -8,7 +8,7 @@ likelihood( chains, statistic = c("size", "length"), offspring_dist, - nsim_obs, + nsim_obs = 100, log = TRUE, obs_prob = 1, stat_max = Inf, From 56c3fd6d974f67c13142aca78a0bc6ab345d7d03 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:33:59 +0100 Subject: [PATCH 10/57] Assign lambda a value less than 1 to prevent example outbreak from overshooting --- R/stat_likelihoods.R | 2 +- man/offspring_ll.Rd | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 2fda56e4..94428885 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -110,7 +110,7 @@ geom_length_ll <- function(x, prob) { #' chains = c(1, 5, 6, 8, 7, 8, 10), #' offspring_dist = "pois", #' statistic = "size", -#' lambda = 2 +#' lambda = 0.82 #' ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 14b602f7..4aa8d874 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -37,6 +37,12 @@ cumulative distribution function (ecdf). } \examples{ set.seed(123) +chain_size_ll <- offspring_ll( + chains = c(1, 5, 6, 8, 7, 8, 10), + offspring_dist = "pois", + statistic = "size", + lambda = 0.82 +) } \seealso{ \code{\link[=simulate_summary]{simulate_summary()}} for simulating a summary of the transmission From 9c7adf82a44a432fbce5128e3988e608cc2dc40e Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:35:38 +0100 Subject: [PATCH 11/57] Make documentation of likelihood function more consistent by using "log-likelihood" all through --- R/likelihood.R | 24 ++++++++++++------------ man/likelihood.Rd | 27 ++++++++++++++------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index a50afc42..287c96dd 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -1,25 +1,25 @@ -#' Estimate the (log) likelihood for observed branching processes +#' Estimate the log-likelihood/likelihood for observed branching processes #' #' @inheritParams simulate_summary -#' @param chains Vector of sizes/lengths of transmission chains. -#' @param nsim_obs Number of simulations if the likelihood is to be -#' approximated for imperfect observations. -#' @param log Logical; Should the results be log-transformed? (Defaults -#' to TRUE). +#' @inheritParams offspring_ll +#' @param nsim_obs Number of simulations if the log-likelihood/likelihood is to +#' be approximated for imperfect observations. +#' @param log Logical; Should the log-likelihoods be transformed to +#' likelihoods? (Defaults to TRUE). #' @param obs_prob Observation probability (assumed constant) #' @param exclude A vector of indices of the sizes/lengths to exclude from the -#' likelihood calculation. -#' @param individual If TRUE, a vector of individual (log)likelihood +#' log-likelihood calculation. +#' @param individual If TRUE, a vector of individual log-likelihood/likelihood #' contributions will be returned rather than the sum. -#' @param ... Parameters for the offspring distribution. #' @return #' * A vector of log-likelihoods, if \code{log = TRUE} (the default) and #' \code{obs_prob < 1}, or #' * A list of individual log-likelihood contributions, if #' \code{log = TRUE} (the default) and \code{individual = TRUE}. -#' else raw likelihoods, or vector of likelihoods -#' @seealso offspring_ll, pois_size_ll, nbinom_size_ll, gborel_size_ll, -#' pois_length_ll, geom_length_ll. +#' The interpretation follows for the other combinations of `log` and +#' `individual`. +#' @seealso offspring_ll(), pois_size_ll(), nbinom_size_ll(), gborel_size_ll(), +#' pois_length_ll(), geom_length_ll() #' @author Sebastian Funk #' @examples #' # example of observed chain sizes diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 57b64b5c..c8d8ff2e 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/likelihood.R \name{likelihood} \alias{likelihood} -\title{Estimate the (log) likelihood for observed branching processes} +\title{Estimate the log-likelihood/likelihood for observed branching processes} \usage{ likelihood( chains, @@ -18,7 +18,7 @@ likelihood( ) } \arguments{ -\item{chains}{Vector of sizes/lengths of transmission chains.} +\item{chains}{Vector of chain summaries (sizes/lengths)} \item{statistic}{String; Statistic to calculate. Can be one of: \itemize{ @@ -31,11 +31,11 @@ corresponding to the R distribution function (e.g., "pois" for Poisson, where \code{\link{rpois}} is the R function to generate Poisson random numbers).} -\item{nsim_obs}{Number of simulations if the likelihood is to be -approximated for imperfect observations.} +\item{nsim_obs}{Number of simulations if the log-likelihood/likelihood is to +be approximated for imperfect observations.} -\item{log}{Logical; Should the results be log-transformed? (Defaults -to TRUE).} +\item{log}{Logical; Should the log-likelihoods be transformed to +likelihoods? (Defaults to TRUE).} \item{obs_prob}{Observation probability (assumed constant)} @@ -43,12 +43,12 @@ to TRUE).} computed. Results above the specified value, are set to \code{Inf}.} \item{exclude}{A vector of indices of the sizes/lengths to exclude from the -likelihood calculation.} +log-likelihood calculation.} -\item{individual}{If TRUE, a vector of individual (log)likelihood +\item{individual}{If TRUE, a vector of individual log-likelihood/likelihood contributions will be returned rather than the sum.} -\item{...}{Parameters for the offspring distribution.} +\item{...}{Parameters of the offspring distribution as required by R.} } \value{ \itemize{ @@ -56,11 +56,12 @@ contributions will be returned rather than the sum.} \code{obs_prob < 1}, or \item A list of individual log-likelihood contributions, if \code{log = TRUE} (the default) and \code{individual = TRUE}. -else raw likelihoods, or vector of likelihoods +The interpretation follows for the other combinations of \code{log} and +\code{individual}. } } \description{ -Estimate the (log) likelihood for observed branching processes +Estimate the log-likelihood/likelihood for observed branching processes } \examples{ # example of observed chain sizes @@ -71,8 +72,8 @@ likelihood( ) } \seealso{ -offspring_ll, pois_size_ll, nbinom_size_ll, gborel_size_ll, -pois_length_ll, geom_length_ll. +offspring_ll(), pois_size_ll(), nbinom_size_ll(), gborel_size_ll(), +pois_length_ll(), geom_length_ll() } \author{ Sebastian Funk From 2c81a58381dc726693978146044c9250b5cdc95e Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:36:00 +0100 Subject: [PATCH 12/57] Improve error message --- R/likelihood.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index 287c96dd..47501a99 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -37,7 +37,9 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ## checks check_offspring_valid(offspring_dist) - if (obs_prob <= 0 || obs_prob > 1) stop("'obs_prob' must be within (0,1]") + if (obs_prob <= 0 || obs_prob > 1) { + stop("'obs_prob' is a probability and must be between 0 and 1 inclusive") + } if (obs_prob < 1) { if (missing(nsim_obs)) { stop("'nsim_obs' must be specified if 'obs_prob' is < 1") From 879dc95bc94d80c1643dc4d1ca690e9c6600a746 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:36:20 +0100 Subject: [PATCH 13/57] Rename a variable --- R/likelihood.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 47501a99..7af5e924 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -45,10 +45,10 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, stop("'nsim_obs' must be specified if 'obs_prob' is < 1") } - sample_func <- get_statistic_func(statistic) + statistic_func <- get_statistic_func(statistic) sampled_x <- replicate(nsim_obs, pmin( - sample_func( + statistic_func( length(chains), chains, obs_prob ), From f1e62d4b469e66d5b85ab05138611d09665db5df Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:36:59 +0100 Subject: [PATCH 14/57] Replace "likelihood" with "log-likelihood" --- R/likelihood.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 7af5e924..63e56bef 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -64,19 +64,19 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, sampled_x <- list(chains) } - ## determine for which sizes to calculate the likelihood (for true chain size) + ## determine for which sizes to calculate the log-likelihood (for true chain size) if (any(size_x == stat_max)) { calc_sizes <- seq_len(stat_max - 1) } else { calc_sizes <- unique(c(size_x, exclude)) } - ## get likelihood function as given by offspring_dist and statistic + ## get log-likelihood function as given by offspring_dist and statistic likelihoods <- vector(mode = "numeric") ll_func <- construct_offspring_ll_name(offspring_dist, statistic) pars <- as.list(unlist(list(...))) ## converts vectors to lists - ## calculate likelihoods + ## calculate log-likelihoods if (exists(ll_func, where = asNamespace("epichains"), mode = "function")) { func <- get(ll_func) likelihoods[calc_sizes] <- do.call(func, c(list(x = calc_sizes), pars)) From 8505e33d30bab3578db3b59d43ab0c333b7674e9 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:37:24 +0100 Subject: [PATCH 15/57] Lint --- R/likelihood.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 63e56bef..5b07b1bc 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -55,9 +55,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, stat_max ), simplify = FALSE) size_x <- unlist(sampled_x) - if (!is.finite(stat_max)) { - stat_max <- max(size_x) + 1 - } + stat_max <- max(size_x) + 1 } else { chains[chains >= stat_max] <- stat_max size_x <- chains @@ -82,14 +80,18 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, likelihoods[calc_sizes] <- do.call(func, c(list(x = calc_sizes), pars)) } else { likelihoods[calc_sizes] <- - do.call( - offspring_ll, - c(list( - chains = calc_sizes, offspring_dist = offspring_dist, - statistic = statistic, stat_max = stat_max, - log = log - ), pars) + do.call( + offspring_ll, + c( + list( + chains = calc_sizes, + offspring_dist = offspring_dist, + statistic = statistic, + stat_max = stat_max + ), + pars ) + ) } ## assign probabilities to stat_max outbreak sizes From 54eeac17354924186533516bf04223a62c1907da Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:38:25 +0100 Subject: [PATCH 16/57] Add exp transformation for when log=FALSE --- R/likelihood.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/likelihood.R b/R/likelihood.R index 5b07b1bc..4a028f2b 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -113,6 +113,11 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, likelihoods[sx[!(sx %in% exclude)]] }) + ## transform log-likelihoods into likelihoods if required + if (!log) { + chains_likelihood <- lapply(chains_likelihood, function(ll) exp(ll)) + } + if (!individual) { chains_likelihood <- vapply(chains_likelihood, sum, 0) } From 206bc4063a87351bdb56dced62344dc1b3d2c18d Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:39:42 +0100 Subject: [PATCH 17/57] Add joint likelihood calculation for where individual=TRUE and depending on log=T/F --- R/likelihood.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index 4a028f2b..be4b2928 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -118,8 +118,15 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, chains_likelihood <- lapply(chains_likelihood, function(ll) exp(ll)) } + ## if individual == FALSE, return the joint log-likelihood + ## (sum of the log-likelihoods), if log == TRUE, else + ## multiply the likelihoods if (!individual) { - chains_likelihood <- vapply(chains_likelihood, sum, 0) + if (log) { + chains_likelihood <- vapply(chains_likelihood, sum, 0) + } else{ + chains_likelihood <- vapply(chains_likelihood, prod, 0) + } } return(chains_likelihood) From 0e619522a11c33902a0e9562f2bfb3b7167d5952 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:11:27 +0100 Subject: [PATCH 18/57] Replace "likelihood" with "log-likelihood" to clarify that the latter is being calculated --- man/offspring_ll.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 4aa8d874..23d58eb2 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -29,10 +29,11 @@ for approximating the distribution of the chain statistic summary } \value{ log-likelihood values +else raw likelihoods } \description{ The log-likelihoods are calculated with a crude approximation using simulated -chain summaries by linearly approximating any missing values in the empirical +chains by linearly approximating any missing values in the empirical cumulative distribution function (ecdf). } \examples{ From c8a87732d82c8104442b7aae0f4077489c2b4c2e Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:13:11 +0100 Subject: [PATCH 19/57] Reword the docs of "offspring_ll" --- man/offspring_ll.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 23d58eb2..4aa8d874 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -29,11 +29,10 @@ for approximating the distribution of the chain statistic summary } \value{ log-likelihood values -else raw likelihoods } \description{ The log-likelihoods are calculated with a crude approximation using simulated -chains by linearly approximating any missing values in the empirical +chain summaries by linearly approximating any missing values in the empirical cumulative distribution function (ecdf). } \examples{ From 6c31e441c8fbdb2b363fd5bc04a2a34f49cd054d Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 17:16:36 +0100 Subject: [PATCH 20/57] Add a seealso tag and clean up examples --- R/stat_likelihoods.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 94428885..07bce3ab 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -106,12 +106,12 @@ geom_length_ll <- function(x, prob) { #' chains statistic (without the tree of infections) #' @examples #' set.seed(123) -#' chain_size_ll <- offspring_ll( -#' chains = c(1, 5, 6, 8, 7, 8, 10), -#' offspring_dist = "pois", -#' statistic = "size", -#' lambda = 0.82 -#' ) +# chain_size_ll <- offspring_ll( +# chains = c(1, 5, 6, 8, 7, 8, 10), +# offspring_dist = "pois", +# statistic = "size", +# lambda = 2 +# ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains From d86a0192dee3158bb40ee311406390617170419a Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:12:04 +0100 Subject: [PATCH 21/57] Fixed the comment tags in the examples --- R/stat_likelihoods.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 07bce3ab..2fda56e4 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -106,12 +106,12 @@ geom_length_ll <- function(x, prob) { #' chains statistic (without the tree of infections) #' @examples #' set.seed(123) -# chain_size_ll <- offspring_ll( -# chains = c(1, 5, 6, 8, 7, 8, 10), -# offspring_dist = "pois", -# statistic = "size", -# lambda = 2 -# ) +#' chain_size_ll <- offspring_ll( +#' chains = c(1, 5, 6, 8, 7, 8, 10), +#' offspring_dist = "pois", +#' statistic = "size", +#' lambda = 2 +#' ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains From 263166fe9740c1ea842eb87b5c0a36210e0bc823 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:33:59 +0100 Subject: [PATCH 22/57] Assign lambda a value less than 1 to prevent example outbreak from overshooting --- R/stat_likelihoods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 2fda56e4..94428885 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -110,7 +110,7 @@ geom_length_ll <- function(x, prob) { #' chains = c(1, 5, 6, 8, 7, 8, 10), #' offspring_dist = "pois", #' statistic = "size", -#' lambda = 2 +#' lambda = 0.82 #' ) offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { From 890ff99eca9fb9f408cc0b93662e8b3089605806 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:38:25 +0100 Subject: [PATCH 23/57] Add exp transformation for when log=FALSE --- R/likelihood.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index be4b2928..43481066 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -118,9 +118,6 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, chains_likelihood <- lapply(chains_likelihood, function(ll) exp(ll)) } - ## if individual == FALSE, return the joint log-likelihood - ## (sum of the log-likelihoods), if log == TRUE, else - ## multiply the likelihoods if (!individual) { if (log) { chains_likelihood <- vapply(chains_likelihood, sum, 0) From fdcd3fea7660ef7b81bf713f1f1fd46d31ec9c96 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:39:42 +0100 Subject: [PATCH 24/57] Add joint likelihood calculation for where individual=TRUE and depending on log=T/F --- R/likelihood.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/likelihood.R b/R/likelihood.R index 43481066..be4b2928 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -118,6 +118,9 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, chains_likelihood <- lapply(chains_likelihood, function(ll) exp(ll)) } + ## if individual == FALSE, return the joint log-likelihood + ## (sum of the log-likelihoods), if log == TRUE, else + ## multiply the likelihoods if (!individual) { if (log) { chains_likelihood <- vapply(chains_likelihood, sum, 0) From 2e4e02cf8271770cad03cd312f3ef342dd4d4195 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:48:24 +0100 Subject: [PATCH 25/57] Linting --- R/likelihood.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index be4b2928..9694143a 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -39,7 +39,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, if (obs_prob <= 0 || obs_prob > 1) { stop("'obs_prob' is a probability and must be between 0 and 1 inclusive") - } + } if (obs_prob < 1) { if (missing(nsim_obs)) { stop("'nsim_obs' must be specified if 'obs_prob' is < 1") From e5b4503d403b67f0e8128c605e266c54b2ac01a0 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Wed, 6 Sep 2023 19:49:17 +0100 Subject: [PATCH 26/57] Linting --- R/likelihood.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 9694143a..d381cbe7 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -62,7 +62,8 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, sampled_x <- list(chains) } - ## determine for which sizes to calculate the log-likelihood (for true chain size) + ## determine for which sizes to calculate the log-likelihood + ## (for true chain size) if (any(size_x == stat_max)) { calc_sizes <- seq_len(stat_max - 1) } else { @@ -80,18 +81,18 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, likelihoods[calc_sizes] <- do.call(func, c(list(x = calc_sizes), pars)) } else { likelihoods[calc_sizes] <- - do.call( - offspring_ll, - c( - list( - chains = calc_sizes, - offspring_dist = offspring_dist, - statistic = statistic, - stat_max = stat_max - ), - pars + do.call( + offspring_ll, + c( + list( + chains = calc_sizes, + offspring_dist = offspring_dist, + statistic = statistic, + stat_max = stat_max + ), + pars + ) ) - ) } ## assign probabilities to stat_max outbreak sizes @@ -115,7 +116,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ## transform log-likelihoods into likelihoods if required if (!log) { - chains_likelihood <- lapply(chains_likelihood, function(ll) exp(ll)) + chains_likelihood <- lapply(chains_likelihood, exp) } ## if individual == FALSE, return the joint log-likelihood @@ -124,7 +125,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, if (!individual) { if (log) { chains_likelihood <- vapply(chains_likelihood, sum, 0) - } else{ + } else { chains_likelihood <- vapply(chains_likelihood, prod, 0) } } From 70247ba65d19fbf1acc9d06730d2bf7f3d235f14 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:23:01 +0100 Subject: [PATCH 27/57] Change type coersion to use explicit logical checks --- R/likelihood.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index d381cbe7..6e9fe7f7 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -115,15 +115,15 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, }) ## transform log-likelihoods into likelihoods if required - if (!log) { + if (!isTRUE(log)) { chains_likelihood <- lapply(chains_likelihood, exp) } ## if individual == FALSE, return the joint log-likelihood ## (sum of the log-likelihoods), if log == TRUE, else ## multiply the likelihoods - if (!individual) { - if (log) { + if (!isTRUE(individual)) { + if (isTRUE(log)) { chains_likelihood <- vapply(chains_likelihood, sum, 0) } else { chains_likelihood <- vapply(chains_likelihood, prod, 0) From 1dd3303398df0cec6817e5c671cfb238dcbb2637 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:24:27 +0100 Subject: [PATCH 28/57] Correct length distribution function doc --- R/stat_likelihoods.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 94428885..8937811c 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -54,7 +54,7 @@ gborel_size_ll <- function(x, size, prob, mu) { #' Log-likelihood of the length of chains with Poisson offspring distribution #' -#' @param x vector of sizes +#' @param x vector of lengths #' @param lambda rate of the Poisson distribution #' @return log-likelihood values #' @author Sebastian Funk @@ -72,7 +72,7 @@ pois_length_ll <- function(x, lambda) { #' Log-likelihood of the length of chains with geometric offspring distribution #' -#' @param x vector of sizes +#' @param x vector of lengths #' @param prob probability of the geometric distribution with mean #' \code{1/prob} #' @return log-likelihood values From 34865fbfb7f1c9e75f15af6d3479326ebd23a39d Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:25:26 +0100 Subject: [PATCH 29/57] Generate length distribution function man files --- man/geom_length_ll.Rd | 2 +- man/pois_length_ll.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/geom_length_ll.Rd b/man/geom_length_ll.Rd index 6c7dc6ad..6397d938 100644 --- a/man/geom_length_ll.Rd +++ b/man/geom_length_ll.Rd @@ -7,7 +7,7 @@ geom_length_ll(x, prob) } \arguments{ -\item{x}{vector of sizes} +\item{x}{vector of lengths} \item{prob}{probability of the geometric distribution with mean \code{1/prob}} diff --git a/man/pois_length_ll.Rd b/man/pois_length_ll.Rd index bf1f47ba..1aa38707 100644 --- a/man/pois_length_ll.Rd +++ b/man/pois_length_ll.Rd @@ -7,7 +7,7 @@ pois_length_ll(x, lambda) } \arguments{ -\item{x}{vector of sizes} +\item{x}{vector of lengths} \item{lambda}{rate of the Poisson distribution} } From 7c9609214876e8dcffbf4f3fedb6eb18e870a24b Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:26:10 +0100 Subject: [PATCH 30/57] Use a bigger vector of chains for example --- R/likelihood.R | 3 ++- man/likelihood.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 6e9fe7f7..b7ec5dbf 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -23,7 +23,8 @@ #' @author Sebastian Funk #' @examples #' # example of observed chain sizes -#' chain_sizes <- c(1, 1, 4, 7) +#' set.seed(121) +#' chain_sizes <- sample(1:10, 20, replace = TRUE) #' likelihood( #' chains = chain_sizes, statistic = "size", #' offspring_dist = "pois", nsim_obs = 100, lambda = 0.5 diff --git a/man/likelihood.Rd b/man/likelihood.Rd index c8d8ff2e..66213269 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -65,7 +65,8 @@ Estimate the log-likelihood/likelihood for observed branching processes } \examples{ # example of observed chain sizes -chain_sizes <- c(1, 1, 4, 7) +set.seed(121) +chain_sizes <- sample(1:10, 20, replace = TRUE) likelihood( chains = chain_sizes, statistic = "size", offspring_dist = "pois", nsim_obs = 100, lambda = 0.5 From 0090a84757bfce71e709db76fdd4dacb740a1973 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:26:23 +0100 Subject: [PATCH 31/57] Remove old tests --- tests/testthat/tests-ll.r | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 tests/testthat/tests-ll.r diff --git a/tests/testthat/tests-ll.r b/tests/testthat/tests-ll.r deleted file mode 100644 index 13a7c339..00000000 --- a/tests/testthat/tests-ll.r +++ /dev/null @@ -1,10 +0,0 @@ -chains <- c(1, 1, 4, 7) -test_that("Analytical size or length distributions are implemented", { - expect_true(all(pois_size_ll(chains, lambda = 0.5) < 0)) - expect_true(all(nbinom_size_ll(chains, mu = 0.5, size = 0.2) < 0)) - expect_true(all(nbinom_size_ll(chains, prob = 0.5, size = 0.2) < 0)) - expect_true(all(gborel_size_ll(chains, prob = 0.5, size = 0.2) < 0)) - expect_true(all(gborel_size_ll(chains, prob = 0.5, size = 0.2) < 0)) - expect_true(all(pois_length_ll(chains, lambda = 0.5) < 0)) - expect_true(all(geom_length_ll(chains, prob = 0.5) < 0)) -}) From c78b2d9157d1dbec4ae82555367b238f90d2f521 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:26:55 +0100 Subject: [PATCH 32/57] Add tests for likelihood function --- tests/testthat/test-likelihood.R | 150 +++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/testthat/test-likelihood.R diff --git a/tests/testthat/test-likelihood.R b/tests/testthat/test-likelihood.R new file mode 100644 index 00000000..c277ad89 --- /dev/null +++ b/tests/testthat/test-likelihood.R @@ -0,0 +1,150 @@ +chains <- c(1, 1, 4, 7) +test_that( + "Likelihoods can be calculated", + { + expect_lt( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5 + ), + 0 + ) + expect_lt( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5, + exclude = 1 + ), + 0 + ) + expect_lt( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5, + stat_max = 5 + ), + 0 + ) + expect_lt( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5, + obs_prob = 0.5, + nsim_obs = 1 + ), + 0 + ) + expect_lt( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5, + stat_max = 5, + obs_prob = 0.5, + nsim_obs = 1 + ), + 0 + ) + } +) + +test_that("Likelihoods are numerically correct", { + expect_identical( + round( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + lambda = 0.5 + ), 5 + ), + -8.6072 + ) + expect_identical( + round( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "nbinom", + mu = 0.5, + size = 0.2 + ), 5 + ), + -9.13437 + ) + expect_identical( + round( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "gborel", + prob = 0.5, + size = 0.2 + ), 5 + ), + -11.21929 + ) + expect_identical( + round( + likelihood( + chains = chains, + statistic = "length", + offspring_dist = "pois", + lambda = 0.5 + ), 5 + ), + -9.39945 + ) + expect_identical( + round( + likelihood( + chains = chains, + statistic = "length", + offspring_dist = "geom", + prob = 0.5 + ), 5 + ), + -12.48639 + ) +}) + +test_that("Errors are thrown", { + expect_error( + likelihood( + chains = chains, + offspring_dist = list(), + statistic = "size", + lambda = 0.5 + ), + "must be specified as a character string" + ) + expect_error( + likelihood( + chains = chains, + offspring_dist = "pois", + statistic = "size", + lambda = 0.5, + obs_prob = 3 + ), + "must be between 0 and 1" + ) + expect_error( + likelihood( + chains = chains, + offspring_dist = "pois", + statistic = "size", + lambda = 0.5, + obs_prob = 0.5 + ), + "must be specified" + ) +}) From 2328143eeabd844a346f1ce459b3f41bb42931c7 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 17:27:32 +0100 Subject: [PATCH 33/57] Add tests for stat log-likelihood functions --- tests/testthat/test-stat_likelihoods.R | 116 +++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 tests/testthat/test-stat_likelihoods.R diff --git a/tests/testthat/test-stat_likelihoods.R b/tests/testthat/test-stat_likelihoods.R new file mode 100644 index 00000000..2f9783d9 --- /dev/null +++ b/tests/testthat/test-stat_likelihoods.R @@ -0,0 +1,116 @@ +set.seed(1231) +chains <- c(1, 1, 4, 7) +test_that("Analytical chain size distributions are numerically correct", { + expect_identical( + round( + nbinom_size_ll( + x = chains, + mu = 0.5, + size = 0.2 + ), + 5 + ), + c(-0.25055, -0.25055, -3.79542, -4.83785) + ) + expect_identical( + round( + nbinom_size_ll( + x = chains, + prob = 0.5, + size = 0.2 + ), + 5 + ), + c(-0.13863, -0.13863, -4.41775, -6.19443) + ) + expect_identical( + round( + gborel_size_ll( + x = chains, + mu = 0.5, + size = 0.2 + ), + 5 + ), + c(-0.25055, -0.25055, -4.58222, -5.83390) + ) + expect_identical( + round( + gborel_size_ll( + x = chains, + prob = 0.5, + size = 0.2 + ), + 5 + ), + c(-0.13863, -0.13863, -4.80803, -6.13400) + ) +}) + +test_that("Analytical chain lengths distributions are numerically correct", { + expect_identical( + round( + pois_length_ll( + x = chains, + lambda = 0.5 + ), + 5 + ), + c(-0.50000, -0.50000, -3.13243, -5.26702) + ) + expect_identical( + round( + geom_length_ll( + x = chains, + prob = 0.5 + ), + 5 + ), + c(-1.09861, -1.09861, -4.06260, -6.22657) + ) +}) + +test_that("Generic offspring log-likelihoods are calculated", { + expect_true( + all( + offspring_ll( + chains = chains, + offspring_dist = "pois", + nsim_offspring = 100, + statistic = "size", + lambda = 0.82 + ) < 0 + ) + ) + expect_length( + offspring_ll( + chains = chains, + offspring_dist = "pois", + nsim_offspring = 100, + statistic = "size", + lambda = 0.82 + ), + 100 + ) +}) + +test_that("Errors are thrown", { + expect_error( + nbinom_size_ll( + x = chains, + mu = 0.5, + size = 0.2, + prob = 0.1 + ), + "both specified" + ) + expect_error( + gborel_size_ll( + x = chains, + mu = 0.5, + size = 0.2, + prob = 0.1 + ), + "both specified" + ) +}) From d36f76c0b4c37c3079681a8e09ab2438b217ddc3 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 20:25:19 +0100 Subject: [PATCH 34/57] Use the right stat vector for the log-likelihood calculation --- R/stat_likelihoods.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index 8937811c..d23fdb84 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -115,7 +115,7 @@ geom_length_ll <- function(x, prob) { offspring_ll <- function(chains, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains - chains <- simulate_summary( + dist <- simulate_summary( nchains = nsim_offspring, offspring_dist = offspring_dist, statistic = statistic, @@ -124,13 +124,13 @@ offspring_ll <- function(chains, offspring_dist, statistic, # Compute the empirical Cumulative Distribution Function of the # simulated chains - chains_empirical_cdf <- stats::ecdf(chains) + chains_empirical_cdf <- stats::ecdf(dist) # Perform a lagged linear interpolation of the points acdf <- diff(c(0, stats::approx( - unique(chains), chains_empirical_cdf(unique(chains)), - seq_len(max(chains[is.finite(chains)])) + unique(dist), chains_empirical_cdf(unique(dist)), + seq_len(max(dist[is.finite(dist)])) )$y)) lik <- acdf[chains] lik[is.na(lik)] <- 0 From b14e0e26800cd95d6a3bb9ad7e517d86a5fa9c46 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 20:36:57 +0100 Subject: [PATCH 35/57] Lint --- R/stat_likelihoods.R | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index d23fdb84..cb0ba039 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -127,11 +127,18 @@ offspring_ll <- function(chains, offspring_dist, statistic, chains_empirical_cdf <- stats::ecdf(dist) # Perform a lagged linear interpolation of the points - acdf <- - diff(c(0, stats::approx( - unique(dist), chains_empirical_cdf(unique(dist)), - seq_len(max(dist[is.finite(dist)])) - )$y)) + acdf <- diff( + c( + 0, + stats::approx( + unique(dist), + chains_empirical_cdf(unique(dist)), + seq_len( + max(dist[is.finite(dist)]) + ) + )$y + ) + ) lik <- acdf[chains] lik[is.na(lik)] <- 0 out <- log(lik) From 09b0013b86dd8456b3de9cff75fb48e59a67abcb Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Thu, 7 Sep 2023 20:37:10 +0100 Subject: [PATCH 36/57] Fix tests --- tests/testthat/test-stat_likelihoods.R | 2 +- tests/testthat/tests-sim.r | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-stat_likelihoods.R b/tests/testthat/test-stat_likelihoods.R index 2f9783d9..a63a53df 100644 --- a/tests/testthat/test-stat_likelihoods.R +++ b/tests/testthat/test-stat_likelihoods.R @@ -90,7 +90,7 @@ test_that("Generic offspring log-likelihoods are calculated", { statistic = "size", lambda = 0.82 ), - 100 + 4 ) }) diff --git a/tests/testthat/tests-sim.r b/tests/testthat/tests-sim.r index 67386a95..05b7b813 100644 --- a/tests/testthat/tests-sim.r +++ b/tests/testthat/tests-sim.r @@ -21,7 +21,7 @@ test_that("Simulators output epichains objects", { ) expect_s3_class( simulate_summary( - n = 10, + nchains = 10, offspring_dist = "pois", lambda = 2, stat_max = 10 From 7c6be4c923da831ecb4dcef304f0bccb42ef1562 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Mon, 11 Sep 2023 19:25:11 +0100 Subject: [PATCH 37/57] Validate the log argument --- R/likelihood.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/likelihood.R b/R/likelihood.R index b7ec5dbf..8e5ff0e8 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -37,6 +37,12 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ## checks check_offspring_valid(offspring_dist) + checkmate::assert_logical( + log, + any.missing = FALSE, + all.missing = FALSE, + len = 1 + ) if (obs_prob <= 0 || obs_prob > 1) { stop("'obs_prob' is a probability and must be between 0 and 1 inclusive") From de5800db267da01468ae9a3f5db54fc7bf217da1 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Mon, 11 Sep 2023 19:25:46 +0100 Subject: [PATCH 38/57] Add more tests of the likelihood() function to improve coverage --- tests/testthat/test-likelihood.R | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/testthat/test-likelihood.R b/tests/testthat/test-likelihood.R index c277ad89..ac2750ec 100644 --- a/tests/testthat/test-likelihood.R +++ b/tests/testthat/test-likelihood.R @@ -54,6 +54,39 @@ test_that( ), 0 ) + expect_lt( + likelihood( + chains = chains, + statistic = "length", + offspring_dist = "binom", + size = 1, + prob = 0.5 + ), + 0 + ) + expect_gte( + likelihood( + chains = chains, + statistic = "length", + offspring_dist = "binom", + size = 1, + prob = 0.5, + log = FALSE + ), + 0 + ) + expect_gte( + likelihood( + chains = chains, + statistic = "length", + offspring_dist = "binom", + size = 1, + prob = 0.5, + individual = FALSE, + log = FALSE + ), + 0 + ) } ) @@ -147,4 +180,15 @@ test_that("Errors are thrown", { ), "must be specified" ) + expect_error( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + nsim_obs = 100, + lambda = 0.5, + log = "s" + ), + "Must be of type 'logical'" + ) }) From de970658fe07e5d111d6273a2eb785d5e0c07311 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Mon, 11 Sep 2023 19:26:07 +0100 Subject: [PATCH 39/57] Add a test for construct_offspring_ll_name() --- tests/testthat/test-helpers.R | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/testthat/test-helpers.R diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R new file mode 100644 index 00000000..3745e5f9 --- /dev/null +++ b/tests/testthat/test-helpers.R @@ -0,0 +1,9 @@ +test_that("Helper functions work correctly", { + expect_equal( + construct_offspring_ll_name( + offspring_dist = "pois", + chain_statistic = "size" + ), + "pois_size_ll" + ) +}) From c91681faeb02b187a95cb6b701d8239195031b93 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Mon, 11 Sep 2023 19:47:10 +0100 Subject: [PATCH 40/57] Fixed linting issues with the tests --- tests/testthat/test-helpers.R | 4 ++-- tests/testthat/test-likelihood.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 3745e5f9..1fbc99d3 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -1,9 +1,9 @@ test_that("Helper functions work correctly", { - expect_equal( + expect_identical( construct_offspring_ll_name( offspring_dist = "pois", chain_statistic = "size" - ), + ), "pois_size_ll" ) }) diff --git a/tests/testthat/test-likelihood.R b/tests/testthat/test-likelihood.R index ac2750ec..cc41654b 100644 --- a/tests/testthat/test-likelihood.R +++ b/tests/testthat/test-likelihood.R @@ -188,7 +188,7 @@ test_that("Errors are thrown", { nsim_obs = 100, lambda = 0.5, log = "s" - ), - "Must be of type 'logical'" + ), + "Must be of type" ) }) From bcd542af2f20dd6ff669836dc2b492db08d55deb Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 13:12:09 +0100 Subject: [PATCH 41/57] Remove default value of nsim_obs argument --- R/likelihood.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index 8e5ff0e8..d4ddef79 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -31,7 +31,7 @@ #' ) #' @export likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, - nsim_obs = 100, log = TRUE, obs_prob = 1, stat_max = Inf, + nsim_obs, log = TRUE, obs_prob = 1, stat_max = Inf, exclude = NULL, individual = FALSE, ...) { statistic <- match.arg(statistic) From 1e017b58fdfbdc605e3639493b8fa25a2a546062 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 14:16:07 +0100 Subject: [PATCH 42/57] Reinstate control to overwrite stat_max when specified as Inf --- R/likelihood.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/likelihood.R b/R/likelihood.R index d4ddef79..5976f038 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -62,7 +62,9 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, stat_max ), simplify = FALSE) size_x <- unlist(sampled_x) + if (!is.finite(stat_max)) { stat_max <- max(size_x) + 1 + } } else { chains[chains >= stat_max] <- stat_max size_x <- chains From ce01a876942e96634fd53804d3ab0294e3cdb09a Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 17:59:14 +0100 Subject: [PATCH 43/57] Inherit dot params from offspring_ll instead of simulate_summary --- R/likelihood.R | 2 +- man/likelihood.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 5976f038..45dd8585 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -1,7 +1,7 @@ #' Estimate the log-likelihood/likelihood for observed branching processes #' -#' @inheritParams simulate_summary #' @inheritParams offspring_ll +#' @inheritParams simulate_summary #' @param nsim_obs Number of simulations if the log-likelihood/likelihood is to #' be approximated for imperfect observations. #' @param log Logical; Should the log-likelihoods be transformed to diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 66213269..2e25c133 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -48,7 +48,7 @@ log-likelihood calculation.} \item{individual}{If TRUE, a vector of individual log-likelihood/likelihood contributions will be returned rather than the sum.} -\item{...}{Parameters of the offspring distribution as required by R.} +\item{...}{any parameters to pass to \code{\link{simulate_summary}}} } \value{ \itemize{ From 5c43a846626d65bb2c9eed2f71ce84d7f7aacc39 Mon Sep 17 00:00:00 2001 From: James Azam <james.m.azam@gmail.com> Date: Tue, 12 Sep 2023 13:13:27 +0100 Subject: [PATCH 44/57] Add a comment to example Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk> --- R/likelihood.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/likelihood.R b/R/likelihood.R index 45dd8585..100940d3 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -24,6 +24,7 @@ #' @examples #' # example of observed chain sizes #' set.seed(121) +#' ## randomly generate 20 chains of size 1 to 10 #' chain_sizes <- sample(1:10, 20, replace = TRUE) #' likelihood( #' chains = chain_sizes, statistic = "size", From 0a4fd60a768ddbb46d81df47703b6d0e6071e58c Mon Sep 17 00:00:00 2001 From: James Azam <james.m.azam@gmail.com> Date: Tue, 12 Sep 2023 14:19:06 +0100 Subject: [PATCH 45/57] Replace !isTRUE to isFALSE Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk> --- R/likelihood.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 100940d3..50704b37 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -125,14 +125,14 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, }) ## transform log-likelihoods into likelihoods if required - if (!isTRUE(log)) { + if (isFALSE(log)) { chains_likelihood <- lapply(chains_likelihood, exp) } ## if individual == FALSE, return the joint log-likelihood ## (sum of the log-likelihoods), if log == TRUE, else ## multiply the likelihoods - if (!isTRUE(individual)) { + if (isFALSE(individual)) { if (isTRUE(log)) { chains_likelihood <- vapply(chains_likelihood, sum, 0) } else { From c29f0ffbf0be63a57c463d6bfb186351b0dbb5bd Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 20:47:52 +0100 Subject: [PATCH 46/57] Generate doc for removed default value of nsim_obs --- man/likelihood.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 2e25c133..0a736029 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -8,7 +8,7 @@ likelihood( chains, statistic = c("size", "length"), offspring_dist, - nsim_obs = 100, + nsim_obs, log = TRUE, obs_prob = 1, stat_max = Inf, From 3d4282bfe9a45f77abf54ea8bd634e1298b1d288 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 20:48:25 +0100 Subject: [PATCH 47/57] Rename variables --- R/likelihood.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 50704b37..be3cb0e5 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -55,29 +55,29 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, statistic_func <- get_statistic_func(statistic) - sampled_x <- replicate(nsim_obs, pmin( + stat_rep_list <- replicate(nsim_obs, pmin( statistic_func( length(chains), chains, obs_prob ), stat_max ), simplify = FALSE) - size_x <- unlist(sampled_x) + stat_rep_vect <- unlist(stat_rep_list) if (!is.finite(stat_max)) { - stat_max <- max(size_x) + 1 + stat_max <- max(stat_rep_vect) + 1 } } else { chains[chains >= stat_max] <- stat_max - size_x <- chains - sampled_x <- list(chains) + stat_rep_vect <- chains + stat_rep_list <- list(chains) } ## determine for which sizes to calculate the log-likelihood ## (for true chain size) - if (any(size_x == stat_max)) { + if (any(stat_rep_vect == stat_max)) { calc_sizes <- seq_len(stat_max - 1) } else { - calc_sizes <- unique(c(size_x, exclude)) + calc_sizes <- unique(c(stat_rep_vect, exclude)) } ## get log-likelihood function as given by offspring_dist and statistic @@ -106,7 +106,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, } ## assign probabilities to stat_max outbreak sizes - if (any(size_x == stat_max)) { + if (any(stat_rep_vect == stat_max)) { likelihoods[stat_max] <- complementary_logprob(likelihoods) } @@ -114,13 +114,13 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, likelihoods <- likelihoods - log(-expm1(sum(likelihoods[exclude]))) likelihoods[exclude] <- -Inf - sampled_x <- lapply(sampled_x, function(y) { + stat_rep_list <- lapply(stat_rep_list, function(y) { y[!(y %in% exclude)] }) } ## assign likelihoods - chains_likelihood <- lapply(sampled_x, function(sx) { + chains_likelihood <- lapply(stat_rep_list, function(sx) { likelihoods[sx[!(sx %in% exclude)]] }) From d344a27d273be586ad780b91ac6c90e5fc4ce84e Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 20:49:27 +0100 Subject: [PATCH 48/57] Revise documentation of likelihood function --- R/likelihood.R | 24 ++++++++++++++++-------- man/likelihood.Rd | 22 +++++++++++++++------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index be3cb0e5..bb5c4404 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -10,21 +10,29 @@ #' @param exclude A vector of indices of the sizes/lengths to exclude from the #' log-likelihood calculation. #' @param individual If TRUE, a vector of individual log-likelihood/likelihood -#' contributions will be returned rather than the sum. +#' contributions will be returned rather than the sum/product. #' @return -#' * A vector of log-likelihoods, if \code{log = TRUE} (the default) and -#' \code{obs_prob < 1}, or -#' * A list of individual log-likelihood contributions, if -#' \code{log = TRUE} (the default) and \code{individual = TRUE}. -#' The interpretation follows for the other combinations of `log` and -#' `individual`. +#' If log = TRUE: +#' +#' * A joint log-likelihood (sum of individual log-likelihoods), if +#' \code{individual == FALSE} (default) and \code{obs_prob = 1} (default), or +#' * A list of individual log-likelihoods, if \code{individual == TRUE} and +#' \code{obs_prob = 1} (default), or +#' * A list of individual log-likelihoods (same length as `nsim_obs`), if +#' \code{individual == TRUE} and \code{0 <= obs_prob < 1}, or +#' * A vector of joint log-likelihoods (same length as `nsim_obs`), if +#' individual == FALSE and \code{0 <= obs_prob < 1} (imperfect observation). +#' +#' If \code{log = FALSE}, likelihoods, instead of log-likelihoods, are returned +#' and the joint likelihoods are the product, instead of the sum, of the +#' individual likelihoods. #' @seealso offspring_ll(), pois_size_ll(), nbinom_size_ll(), gborel_size_ll(), #' pois_length_ll(), geom_length_ll() #' @author Sebastian Funk #' @examples #' # example of observed chain sizes #' set.seed(121) -#' ## randomly generate 20 chains of size 1 to 10 +#' # randomly generate 20 chains of size 1 to 10 #' chain_sizes <- sample(1:10, 20, replace = TRUE) #' likelihood( #' chains = chain_sizes, statistic = "size", diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 0a736029..4173e3a4 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -46,19 +46,26 @@ computed. Results above the specified value, are set to \code{Inf}.} log-likelihood calculation.} \item{individual}{If TRUE, a vector of individual log-likelihood/likelihood -contributions will be returned rather than the sum.} +contributions will be returned rather than the sum/product.} \item{...}{any parameters to pass to \code{\link{simulate_summary}}} } \value{ +If log = TRUE: \itemize{ -\item A vector of log-likelihoods, if \code{log = TRUE} (the default) and -\code{obs_prob < 1}, or -\item A list of individual log-likelihood contributions, if -\code{log = TRUE} (the default) and \code{individual = TRUE}. -The interpretation follows for the other combinations of \code{log} and -\code{individual}. +\item A joint log-likelihood (sum of individual log-likelihoods), if +\code{individual == FALSE} (default) and \code{obs_prob = 1} (default), or +\item A list of individual log-likelihoods, if \code{individual == TRUE} and +\code{obs_prob = 1} (default), or +\item A list of individual log-likelihoods (same length as \code{nsim_obs}), if +\code{individual == TRUE} and \code{0 <= obs_prob < 1}, or +\item A vector of joint log-likelihoods (same length as \code{nsim_obs}), if +individual == FALSE and \code{0 <= obs_prob < 1} (imperfect observation). } + +If \code{log = FALSE}, likelihoods, instead of log-likelihoods, are returned +and the joint likelihoods are the product, instead of the sum, of the +individual likelihoods. } \description{ Estimate the log-likelihood/likelihood for observed branching processes @@ -66,6 +73,7 @@ Estimate the log-likelihood/likelihood for observed branching processes \examples{ # example of observed chain sizes set.seed(121) +# randomly generate 20 chains of size 1 to 10 chain_sizes <- sample(1:10, 20, replace = TRUE) likelihood( chains = chain_sizes, statistic = "size", From 72522fe7f18983a14b4f9f2654bee2579da63919 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:01:48 +0100 Subject: [PATCH 49/57] Rename chains to x --- R/stat_likelihoods.R | 8 ++++---- man/offspring_ll.Rd | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/stat_likelihoods.R b/R/stat_likelihoods.R index cb0ba039..148ffbf6 100644 --- a/R/stat_likelihoods.R +++ b/R/stat_likelihoods.R @@ -94,7 +94,7 @@ geom_length_ll <- function(x, prob) { #' cumulative distribution function (ecdf). #' @inheritParams likelihood #' @inheritParams simulate_summary -#' @param chains Vector of chain summaries (sizes/lengths) +#' @param x Vector of chain summaries (sizes/lengths) #' @param nsim_offspring Number of simulations of the offspring distribution #' for approximating the distribution of the chain statistic summary #' (size/length) @@ -107,12 +107,12 @@ geom_length_ll <- function(x, prob) { #' @examples #' set.seed(123) #' chain_size_ll <- offspring_ll( -#' chains = c(1, 5, 6, 8, 7, 8, 10), +#' x = c(1, 5, 6, 8, 7, 8, 10), #' offspring_dist = "pois", #' statistic = "size", #' lambda = 0.82 #' ) -offspring_ll <- function(chains, offspring_dist, statistic, +offspring_ll <- function(x, offspring_dist, statistic, nsim_offspring = 100, ...) { # Simulate the chains dist <- simulate_summary( @@ -139,7 +139,7 @@ offspring_ll <- function(chains, offspring_dist, statistic, )$y ) ) - lik <- acdf[chains] + lik <- acdf[x] lik[is.na(lik)] <- 0 out <- log(lik) return(out) diff --git a/man/offspring_ll.Rd b/man/offspring_ll.Rd index 4aa8d874..53632fee 100644 --- a/man/offspring_ll.Rd +++ b/man/offspring_ll.Rd @@ -5,10 +5,10 @@ \title{Log-likelihood of the summary (size/length) of chains with generic offspring distribution} \usage{ -offspring_ll(chains, offspring_dist, statistic, nsim_offspring = 100, ...) +offspring_ll(x, offspring_dist, statistic, nsim_offspring = 100, ...) } \arguments{ -\item{chains}{Vector of chain summaries (sizes/lengths)} +\item{x}{Vector of chain summaries (sizes/lengths)} \item{offspring_dist}{Offspring distribution: a character string corresponding to the R distribution function (e.g., "pois" for Poisson, @@ -38,7 +38,7 @@ cumulative distribution function (ecdf). \examples{ set.seed(123) chain_size_ll <- offspring_ll( - chains = c(1, 5, 6, 8, 7, 8, 10), + x = c(1, 5, 6, 8, 7, 8, 10), offspring_dist = "pois", statistic = "size", lambda = 0.82 From 795e425e16065f78f12a3ed68f4e0087fe3b4533 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:02:26 +0100 Subject: [PATCH 50/57] Document chains argument due to loss of inheritance --- R/likelihood.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/likelihood.R b/R/likelihood.R index bb5c4404..01f1f78a 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -2,6 +2,7 @@ #' #' @inheritParams offspring_ll #' @inheritParams simulate_summary +#' @param chains Vector of chain summaries (sizes/lengths) #' @param nsim_obs Number of simulations if the log-likelihood/likelihood is to #' be approximated for imperfect observations. #' @param log Logical; Should the log-likelihoods be transformed to From c467946048f5fa34443c36875566a40ab6c24e4d Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:02:37 +0100 Subject: [PATCH 51/57] Linting --- R/likelihood.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index 01f1f78a..634ecf1c 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -73,7 +73,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ), simplify = FALSE) stat_rep_vect <- unlist(stat_rep_list) if (!is.finite(stat_max)) { - stat_max <- max(stat_rep_vect) + 1 + stat_max <- max(stat_rep_vect) + 1 } } else { chains[chains >= stat_max] <- stat_max From a45788cc234d0580bb4b272081a2636a528eca1b Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:02:57 +0100 Subject: [PATCH 52/57] Revise return value documentation --- R/likelihood.R | 9 +++++---- man/likelihood.Rd | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 634ecf1c..23c5552d 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -13,7 +13,7 @@ #' @param individual If TRUE, a vector of individual log-likelihood/likelihood #' contributions will be returned rather than the sum/product. #' @return -#' If log = TRUE: +#' If \code{log = TRUE} #' #' * A joint log-likelihood (sum of individual log-likelihoods), if #' \code{individual == FALSE} (default) and \code{obs_prob = 1} (default), or @@ -24,9 +24,10 @@ #' * A vector of joint log-likelihoods (same length as `nsim_obs`), if #' individual == FALSE and \code{0 <= obs_prob < 1} (imperfect observation). #' -#' If \code{log = FALSE}, likelihoods, instead of log-likelihoods, are returned -#' and the joint likelihoods are the product, instead of the sum, of the -#' individual likelihoods. +#' If \code{log = FALSE}, the same structure of outputs as above are returned, +#' except that likelihoods, instead of log-likelihoods, are calculated in all +#' cases. Moreover, the joint likelihoods are the product, instead of the sum, +#' of the individual likelihoods. #' @seealso offspring_ll(), pois_size_ll(), nbinom_size_ll(), gborel_size_ll(), #' pois_length_ll(), geom_length_ll() #' @author Sebastian Funk diff --git a/man/likelihood.Rd b/man/likelihood.Rd index 4173e3a4..6c69b974 100644 --- a/man/likelihood.Rd +++ b/man/likelihood.Rd @@ -51,7 +51,7 @@ contributions will be returned rather than the sum/product.} \item{...}{any parameters to pass to \code{\link{simulate_summary}}} } \value{ -If log = TRUE: +If \code{log = TRUE} \itemize{ \item A joint log-likelihood (sum of individual log-likelihoods), if \code{individual == FALSE} (default) and \code{obs_prob = 1} (default), or @@ -63,9 +63,10 @@ If log = TRUE: individual == FALSE and \code{0 <= obs_prob < 1} (imperfect observation). } -If \code{log = FALSE}, likelihoods, instead of log-likelihoods, are returned -and the joint likelihoods are the product, instead of the sum, of the -individual likelihoods. +If \code{log = FALSE}, the same structure of outputs as above are returned, +except that likelihoods, instead of log-likelihoods, are calculated in all +cases. Moreover, the joint likelihoods are the product, instead of the sum, +of the individual likelihoods. } \description{ Estimate the log-likelihood/likelihood for observed branching processes From fe95b8f946be72232a8b1aae84a7a62b3ed9df6b Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:43:06 +0100 Subject: [PATCH 53/57] Update offspring_ll calls to use x instead of chains --- R/likelihood.R | 2 +- tests/testthat/test-stat_likelihoods.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 23c5552d..4bd41ce1 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -105,7 +105,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, offspring_ll, c( list( - chains = calc_sizes, + x = calc_sizes, offspring_dist = offspring_dist, statistic = statistic, stat_max = stat_max diff --git a/tests/testthat/test-stat_likelihoods.R b/tests/testthat/test-stat_likelihoods.R index a63a53df..5de37753 100644 --- a/tests/testthat/test-stat_likelihoods.R +++ b/tests/testthat/test-stat_likelihoods.R @@ -74,7 +74,7 @@ test_that("Generic offspring log-likelihoods are calculated", { expect_true( all( offspring_ll( - chains = chains, + x = chains, offspring_dist = "pois", nsim_offspring = 100, statistic = "size", @@ -84,7 +84,7 @@ test_that("Generic offspring log-likelihoods are calculated", { ) expect_length( offspring_ll( - chains = chains, + x = chains, offspring_dist = "pois", nsim_offspring = 100, statistic = "size", From fa8a255fca38303e039987fc49481f7f1fd5b6b9 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 21:43:22 +0100 Subject: [PATCH 54/57] Add more tests --- tests/testthat/test-stat_likelihoods.R | 61 ++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/testthat/test-stat_likelihoods.R b/tests/testthat/test-stat_likelihoods.R index 5de37753..bfc81e8d 100644 --- a/tests/testthat/test-stat_likelihoods.R +++ b/tests/testthat/test-stat_likelihoods.R @@ -45,6 +45,16 @@ test_that("Analytical chain size distributions are numerically correct", { ), c(-0.13863, -0.13863, -4.80803, -6.13400) ) + expect_identical( + round( + pois_size_ll( + x = chains, + lambda = 0.2 + ), + 5 + ), + c(-0.20000, -0.20000, -4.64748, -7.90633) + ) }) test_that("Analytical chain lengths distributions are numerically correct", { @@ -114,3 +124,54 @@ test_that("Errors are thrown", { "both specified" ) }) + +test_that("Likelihood function returns the right object classes", { + expect_type( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + nsim_obs = 100, + lambda = 0.5, + obs_prob = 0.5, + individual = TRUE + ), + "list" + ) + expect_type( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + nsim_obs = 3, + lambda = 0.5, + obs_prob = 0.5, + individual = FALSE + ), + "double" + ) + expect_type( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + nsim_obs = 3, + lambda = 0.5, + obs_prob = 1, + individual = TRUE + ), + "list" + ) + expect_type( + likelihood( + chains = chains, + statistic = "size", + offspring_dist = "pois", + nsim_obs = 100, + lambda = 0.5, + obs_prob = 1, + individual = FALSE + ), + "double" + ) +}) From 0e659873cfe8a27e3ad6bb4ce96a2e1e564babd1 Mon Sep 17 00:00:00 2001 From: James Azam <james.m.azam@gmail.com> Date: Tue, 12 Sep 2023 22:22:23 +0100 Subject: [PATCH 55/57] Abbreviate code block with ifelse + vapply construct Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk> --- R/likelihood.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index 4bd41ce1..79ed452a 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -143,11 +143,8 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ## (sum of the log-likelihoods), if log == TRUE, else ## multiply the likelihoods if (isFALSE(individual)) { - if (isTRUE(log)) { - chains_likelihood <- vapply(chains_likelihood, sum, 0) - } else { - chains_likelihood <- vapply(chains_likelihood, prod, 0) - } + summarise_func <- ifelse(log, sum, prod) + vapply(chains_likelihood, summarise_func, 0) } return(chains_likelihood) From 564e2a9db044ca01bf20f23078983b535f0824b6 Mon Sep 17 00:00:00 2001 From: jamesaazam <james.azam@lshtm.ac.uk> Date: Tue, 12 Sep 2023 23:13:48 +0100 Subject: [PATCH 56/57] Assign final value to return the right copy --- R/likelihood.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/likelihood.R b/R/likelihood.R index 79ed452a..ab72099f 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -144,7 +144,7 @@ likelihood <- function(chains, statistic = c("size", "length"), offspring_dist, ## multiply the likelihoods if (isFALSE(individual)) { summarise_func <- ifelse(log, sum, prod) - vapply(chains_likelihood, summarise_func, 0) + chains_likelihood <- vapply(chains_likelihood, summarise_func, 0) } return(chains_likelihood) From 9e601a1dbc808390cce05de2fcb03a85278613bf Mon Sep 17 00:00:00 2001 From: James Azam <james.m.azam@gmail.com> Date: Wed, 13 Sep 2023 10:16:56 +0100 Subject: [PATCH 57/57] Replace = with == for consitency. Co-authored-by: Sebastian Funk <sebastian.funk@lshtm.ac.uk> --- R/likelihood.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/likelihood.R b/R/likelihood.R index ab72099f..63b33af1 100644 --- a/R/likelihood.R +++ b/R/likelihood.R @@ -16,9 +16,9 @@ #' If \code{log = TRUE} #' #' * A joint log-likelihood (sum of individual log-likelihoods), if -#' \code{individual == FALSE} (default) and \code{obs_prob = 1} (default), or +#' \code{individual == FALSE} (default) and \code{obs_prob == 1} (default), or #' * A list of individual log-likelihoods, if \code{individual == TRUE} and -#' \code{obs_prob = 1} (default), or +#' \code{obs_prob == 1} (default), or #' * A list of individual log-likelihoods (same length as `nsim_obs`), if #' \code{individual == TRUE} and \code{0 <= obs_prob < 1}, or #' * A vector of joint log-likelihoods (same length as `nsim_obs`), if