From 83e39b47cd7e3700e98c8d6797b63dafd5d22f5a Mon Sep 17 00:00:00 2001 From: cschwarz-stat-sfu-ca Date: Sun, 24 Oct 2021 16:00:28 -0700 Subject: [PATCH] Fixed problem when plotting logitP or logU when extreme values present (issue #30). --- CRAN-RELEASE | 4 +- DESCRIPTION | 4 +- NEWS.md | 5 + R/RunTime.R | 2 +- R/TimeStratPetersenDiagErrorWHChinook2_fit.R | 17 +- R/TimeStratPetersenDiagErrorWHChinook_fit.R | 78 ++--- R/TimeStratPetersenDiagErrorWHSteel_fit.R | 53 ++- R/TimeStratPetersenDiagError_fit.R | 67 ++-- ...StratPetersenNonDiagErrorNPMarkAvail_fit.R | 77 ++--- R/TimeStratPetersenNonDiagErrorNP_fit.R | 75 ++--- R/TimeStratPetersenNonDiagError_fit.R | 71 ++-- R/plot_logit.R | 22 +- cran-comments.md | 5 +- man-roxygen/Delta.max.R | 2 + man-roxygen/debug.R | 7 + man-roxygen/jump.after.R | 6 + man-roxygen/prior.beta.logitP.mean.sd.R | 4 + man-roxygen/run.prob.R | 3 + man-roxygen/tauP.alpha.beta.R | 5 + man-roxygen/tauTT.alpha.beta.R | 5 + man-roxygen/tauU.alpha.beta.R | 5 + man-roxygen/taueU.alpha.beta.R | 5 + man-roxygen/trunc.logitP.R | 2 + man-roxygen/u2.D.R | 3 + man-roxygen/u2.ND.R | 3 + ...TimeStratPetersenDiagErrorWHChinook_fit.Rd | 9 +- man/TimeStratPetersenDiagErrorWHSteel_fit.Rd | 6 +- man/TimeStratPetersenDiagError_fit.Rd | 10 +- ...tratPetersenNonDiagErrorNPMarkAvail_fit.Rd | 6 +- man/TimeStratPetersenNonDiagErrorNP_fit.Rd | 6 +- man/TimeStratPetersenNonDiagError_fit.Rd | 6 +- render-vignettes.R | 2 +- vignettes/a-Diagonal-model.html | 309 +++++++++--------- .../b-Diagonal-model-with-multiple-ages.html | 38 +-- vignettes/c-Non-diagonal-model.html | 34 +- .../d-Non-diagonal-with-fall-back-model.html | 24 +- .../e-Bias-from-incomplete-sampling.html | 32 +- ...f-Interpolating-run-earlier-and-later.html | 12 +- 38 files changed, 527 insertions(+), 497 deletions(-) create mode 100644 man-roxygen/Delta.max.R create mode 100644 man-roxygen/debug.R create mode 100644 man-roxygen/jump.after.R create mode 100644 man-roxygen/prior.beta.logitP.mean.sd.R create mode 100644 man-roxygen/run.prob.R create mode 100644 man-roxygen/tauP.alpha.beta.R create mode 100644 man-roxygen/tauTT.alpha.beta.R create mode 100644 man-roxygen/tauU.alpha.beta.R create mode 100644 man-roxygen/taueU.alpha.beta.R create mode 100644 man-roxygen/trunc.logitP.R create mode 100644 man-roxygen/u2.D.R create mode 100644 man-roxygen/u2.ND.R diff --git a/CRAN-RELEASE b/CRAN-RELEASE index ed743aa..db924b1 100644 --- a/CRAN-RELEASE +++ b/CRAN-RELEASE @@ -1,2 +1,2 @@ -This package was submitted to CRAN on 2021-10-09. -Once it is accepted, delete this file and tag the release (commit 7d17b50). +This package was submitted to CRAN on 2021-10-24. +Once it is accepted, delete this file and tag the release (commit 1be18a7). diff --git a/DESCRIPTION b/DESCRIPTION index d88dac6..6217628 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BTSPAS -Version: 2021.11.1 -Date: 2021-11-01 +Version: 2021.11.2 +Date: 2021-11-02 Title: Bayesian Time-Stratified Population Analysis Author: Carl J Schwarz and Simon J Bonner diff --git a/NEWS.md b/NEWS.md index 7ff5c4e..1ed43a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# BTSPAS 2011.11.2 + +* Added trunc.logitP to truncate logit(P) when plotting to avoid problems with extreme cases (issue #30) +* Editorial changes + # BTSPAS 2011.11.1 * Fixed importing sd() from stat package that conflicts with revised sd() in actuar package diff --git a/R/RunTime.R b/R/RunTime.R index d136fd1..75a7341 100644 --- a/R/RunTime.R +++ b/R/RunTime.R @@ -28,7 +28,7 @@ RunTime <- function(time, U, prob=seq(0,1,.1)) { timing <- c(min(time):(1+max(time))) q.U <- plyr::adply(U, 1, function(U.sample, timing){ - quant <- stats::quantile(actuar::grouped.data(Group=timing, Frequency=U.sample), prob=prob) + quant <- stats::quantile(actuar::grouped.data(Group=timing, Frequency=U.sample), prob=prob, na.rm=TRUE) quant }, timing=timing, .id=NULL) q.U diff --git a/R/TimeStratPetersenDiagErrorWHChinook2_fit.R b/R/TimeStratPetersenDiagErrorWHChinook2_fit.R index 2226978..4e169c4 100644 --- a/R/TimeStratPetersenDiagErrorWHChinook2_fit.R +++ b/R/TimeStratPetersenDiagErrorWHChinook2_fit.R @@ -1,3 +1,4 @@ +# 2021-10-23 CJS Added trunc.logitP to fix plotting problems with extreme logitP # 2020-12-15 CJS Removed all references to sampfrac in the code # 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP # 2018-12-19 CJS deprecation of sampling fraction @@ -37,12 +38,13 @@ TimeStratPetersenDiagErrorWHChinook2_fit<- run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0,1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { # Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error, # covariates for the the capture probabilities, and separating the YoY and Age1 wild vs hatchery fish # The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release # - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) # Input parameters are @@ -630,6 +632,14 @@ if (debug) get.est("H.1" ,plot.df, hatch.after.YoY, results), get.est("W.YoY",plot.df, hatch.after.YoY, results), get.est("W.1" ,plot.df, hatch.after.YoY, results)) + + # add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.data$logUguess <- pmax(-10 , pmin(20, plot.data$logUguess)) + plot.data$logU <- pmax(-10 , pmin(20, plot.data$logU )) + plot.data$logUlcl <- pmax(-10 , pmin(20, plot.data$logUlcl )) + plot.data$logUucl <- pmax(-10 , pmin(20, plot.data$logUucl )) + plot.data$spline<- pmax(-10 , pmin(20, plot.data$spline)) + fit.plot <- ggplot(data=plot.data, aes_(x=~time, color=~group))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals")+ geom_point(aes_(y=~logUguess), shape=16, position=position_dodge(width=.2))+ # guesses for population @@ -658,7 +668,8 @@ results$plots$fit.plot <- fit.plot # plot logit P vs time logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, - u2=u2.A.YoY+u2.N.YoY+u2.A.1+u2.N.1, logitP.cov=new.logitP.cov, results=results) + u2=u2.A.YoY+u2.N.YoY+u2.A.1+u2.N.1, logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenDiagErrorWHChinook_fit.R b/R/TimeStratPetersenDiagErrorWHChinook_fit.R index b2ee902..0307253 100644 --- a/R/TimeStratPetersenDiagErrorWHChinook_fit.R +++ b/R/TimeStratPetersenDiagErrorWHChinook_fit.R @@ -1,3 +1,4 @@ +# 2021-10-23 CJS Added trunc.logitP parameter to avoid plotting problems # 2020-12-15 CJS Removed all uses of sampfrac in the code # 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP # 2018-12-19 CSJ deprecated use of sampling fraction @@ -84,33 +85,15 @@ #' These are set to NA prior to the fit. #' @template logitP.cov #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template prior.beta.logitP.mean.sd +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed #' @template save.output.to.files +#' @template trunc.logitP #' @return An MCMC object with samples from the posterior distribution. A #' series of graphs and text file are also created in the working directory. @@ -138,12 +121,13 @@ TimeStratPetersenDiagErrorWHChinook_fit<- run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0, max=1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { # Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error, # covariates for the the capture probabilities, and separating the wild vs hatchery fish # The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release # - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) # Input parameters are @@ -666,18 +650,18 @@ if (debug2) { results.row.names <- rownames(results$summary) etaU.W.row.index <- grep("etaU.W", results.row.names) etaU.W <- results$summary[etaU.W.row.index,] - plot.df$logU.W =etaU.W[,"mean"] - plot.df$lcl.W =etaU.W[,"2.5%"] - plot.df$ucl.W =etaU.W[,"97.5%"] + plot.df$logU.W = etaU.W[,"mean"] + plot.df$logUlcl.W = etaU.W[,"2.5%"] + plot.df$logUucl.W = etaU.W[,"97.5%"] etaU.H.row.index <- grep("etaU.H", results.row.names) etaU.H <- results$summary[etaU.H.row.index,] - plot.df$logU.H =etaU.H[,"mean"] - plot.df$lcl.H =etaU.H[,"2.5%"] - plot.df$ucl.H =etaU.H[,"97.5%"] + plot.df$logU.H = etaU.H[,"mean"] + plot.df$logUlcl.H = etaU.H[,"2.5%"] + plot.df$logUucl.H = etaU.H[,"97.5%"] plot.df$logU.H [1:(hatch.after - min(time)+1)] <- NA # no hatchery fish until release at hatch.after - plot.df$lcl.H [1:(hatch.after - min(time)+1)] <- NA - plot.df$ucl.H [1:(hatch.after - min(time)+1)] <- NA + plot.df$logUlcl.H [1:(hatch.after - min(time)+1)] <- NA + plot.df$logUucl.H [1:(hatch.after - min(time)+1)] <- NA # extract the spline values for W (wild) and H (hatchery) fish logUne.W.row.index <- grep("logUne.W", results.row.names) @@ -686,18 +670,32 @@ if (debug2) { plot.df$spline.H <- results$summary[logUne.H.row.index,"mean"] plot.df$spline.H [1:(hatch.after - min(time)+1)] <- NA # no hatchery fish until release at hatch.after + # add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.df$logUguess.W <- pmax(-10 , pmin(20, plot.df$logUguess.W)) + plot.df$logUguess.H <- pmax(-10 , pmin(20, plot.df$logUguess.H)) + plot.df$logU.W <- pmax(-10 , pmin(20, plot.df$logU.W )) + plot.df$logU.H <- pmax(-10 , pmin(20, plot.df$logU.H )) + plot.df$logUlcl.W <- pmax(-10 , pmin(20, plot.df$logUlcl.W )) + plot.df$logUlcl.H <- pmax(-10 , pmin(20, plot.df$logUlcl.H )) + plot.df$logUucl.W <- pmax(-10 , pmin(20, plot.df$logUucl.W )) + plot.df$logUucl.H <- pmax(-10 , pmin(20, plot.df$logUucl.H )) + plot.df$spline.W <- pmax(-10 , pmin(20, plot.df$spline.W)) + plot.df$spline.H <- pmax(-10 , pmin(20, plot.df$spline.H)) + + fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ggtitle(title, subtitle="Fitted spline curve to raw U.W[i] U.H[i] with 95% credible intervals")+ geom_point(aes_(y=~logUguess.W), color="red", shape="w")+ # guesses for wild file geom_point(aes_(y=~logUguess.H), color="green", shape="h")+ # guesses for hatchery fish - xlab("Time Index\nFitted/Smoothed/Raw values plotted for W(black) and H(blue)")+ylab("log(U[i]) + 95% credible interval")+ + xlab("Time Index\nFitted/Smoothed/Raw values plotted for W(black) and H(blue)")+ + ylab("log(U[i]) + 95% credible interval")+ geom_point(aes_(y=~logU.W), color="black", shape=19)+ geom_line (aes_(y=~logU.W), color="black")+ - geom_errorbar(aes_(ymin=~lcl.W, ymax=~ucl.W), width=.1)+ + geom_errorbar(aes_(ymin=~logUlcl.W, ymax=~logUucl.W), width=.1)+ geom_line(aes_(y=~spline.W),linetype="dashed") + geom_point(aes_(y=~logU.H), color="blue", shape=19)+ geom_line (aes_(y=~logU.H), color="blue")+ - geom_errorbar(aes_(ymin=~lcl.H, ymax=~ucl.H), width=.1, color="blue")+ + geom_errorbar(aes_(ymin=~logUlcl.H, ymax=~logUucl.H), width=.1, color="blue")+ geom_line(aes_(y=~spline.H),linetype="dashed",color="blue")+ ylim(c(-2,NA))+ scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+ @@ -716,7 +714,9 @@ if(save.output.to.files)ggsave(plot=fit.plot, filename=paste(prefix,"-fit.pdf",s results$plots$fit.plot <- fit.plot # Plot the logitP over time -logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2.A+new.u2.N, logitP.cov=new.logitP.cov, results=results) +logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2.A+new.u2.N, + logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in", dpi=300) results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenDiagErrorWHSteel_fit.R b/R/TimeStratPetersenDiagErrorWHSteel_fit.R index 86434fd..71fd4c6 100644 --- a/R/TimeStratPetersenDiagErrorWHSteel_fit.R +++ b/R/TimeStratPetersenDiagErrorWHSteel_fit.R @@ -1,3 +1,4 @@ +# 2021-10-24 CJS Added trunc.logitP to fix plotting problems with extreme values of logitP # 2020-12-15 CJS Remove sampfrac from code # 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP # 2018-12-19 CJS deprecated use of sampling fraction @@ -67,33 +68,15 @@ #' hatchery unmarked (but adipose fin clipped) age 1+ fish should be ignored. #' @template logitP.cov #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template prior.beta.logitP.mean.sd +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed -#' @template save.output.to.files +#' @template save.output.to.files +#' @template trunc.logitP #' #' @return An MCMC object with samples from the posterior distribution. A #' series of graphs and text file are also created in the working directory. @@ -121,13 +104,14 @@ TimeStratPetersenDiagErrorWHSteel_fit <- run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0, 1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { # Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error, # covariates for the the capture probabilities, and separating the wild vs hatchery fish for STEELHEAD releases # The steelhead are nice because 100% of hatchery fish are adipose fin clipped and no wild fish are adipose fin clipped # The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release # - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) # Input parameters are @@ -223,7 +207,7 @@ if(!all(bad.u2.W.1 %in% time, na.rm=TRUE)){ if(!all(bad.u2.H.1 %in% time, na.rm=TRUE)){ cat("***** ERROR ***** bad.u2.H.1 must be elements of strata identifiers. You entered \n bad.u2.H.1:", paste(bad.u2.H.1,collapse=","),"\n Strata identifiers are \n time:", - paste(time, ,collapse=","), "\n") + paste(time ,collapse=","), "\n") return()} if(!all(hatch.after %in% time, na.rm=TRUE)){ cat("***** ERROR ***** hatch.after must be elements of strata identifiers. You entered \n hatch.after:", @@ -621,6 +605,14 @@ if (debug) plot.data <-rbind( get.est("H.1" ,plot.df, hatch.after, results), get.est("W.YoY",plot.df, hatch.after, results), get.est("W.1" ,plot.df, hatch.after, results)) + + # add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.data$logUguess <- pmax(-10 , pmin(20, plot.data$logUguess)) + plot.data$logU <- pmax(-10 , pmin(20, plot.data$logU )) + plot.data$logUlcl <- pmax(-10 , pmin(20, plot.data$logUlcl )) + plot.data$logUucl <- pmax(-10 , pmin(20, plot.data$logUucl )) + plot.data$spline <- pmax(-10 , pmin(20, plot.data$spline)) + fit.plot <- ggplot(data=plot.data, aes_(x=~time, color=~group))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals")+ geom_point(aes_(y=~logUguess), shape=16, position=position_dodge(width=.2))+ # guesses for population @@ -650,7 +642,8 @@ results$plots$fit.plot <- fit.plot # plot the estimated logits over time logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, - u2=new.u2.W.YoY+new.u2.W.1+new.u2.H.1, logitP.cov=new.logitP.cov, results=results) + u2=new.u2.W.YoY+new.u2.W.1+new.u2.H.1, logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenDiagError_fit.R b/R/TimeStratPetersenDiagError_fit.R index 645693a..264c340 100644 --- a/R/TimeStratPetersenDiagError_fit.R +++ b/R/TimeStratPetersenDiagError_fit.R @@ -1,3 +1,4 @@ +# 2021-10-24 CJS added trunc.logitP to avoid problems with plotting # 2020-12-15 CJS removed sampfrac from code but left argument with warning message # 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP # 2018-12-19 CJS Deprecated use of sampling.fraction @@ -47,15 +48,9 @@ #' @param m2 A numeric vector of the number of marked fish from n1 that are #' recaptured in each time stratum. All recaptures take place within the #' stratum of release. -#' @param u2 A numeric vector of the number of unmarked fish captured in each -#' stratum. These will be expanded by the capture efficiency to estimate the -#' population size in each stratum. +#' @template u2.D #' @template sampfrac -#' @param jump.after A numeric vector with elements belonging to \code{time}. -#' In some cases, the spline fitting the population numbers should be allowed -#' to jump. For example, the population size could take a jump when hatchery -#' released fish suddenly arrive at the trap. The jumps occur AFTER the strata -#' listed in this argument. +#' @template jump.after #' @template bad.n1 #' @template bad.m2 #' @template bad.u2 @@ -71,33 +66,15 @@ #' as -50 because numerical problems could occur in WinBugs/OpenBugs. #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template prior.beta.logitP.mean.sd +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed #' @template save.output.to.files +#' @template trunc.logitP #' #' @return An MCMC object with samples from the posterior distribution. A @@ -125,12 +102,13 @@ TimeStratPetersenDiagError_fit <- run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0, max=1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { # Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error # The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release # - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) # Input parameters are @@ -471,9 +449,9 @@ if (debug) results.row.names <- rownames(results$summary) etaU.row.index <- grep("etaU", results.row.names) etaU<- results$summary[etaU.row.index,] - plot.df$logU =etaU[,"mean"] - plot.df$lcl =etaU[,"2.5%"] - plot.df$ucl =etaU[,"97.5%"] + plot.df$logU = etaU[,"mean"] + plot.df$logUlcl = etaU[,"2.5%"] + plot.df$logUucl = etaU[,"97.5%"] # extract the spline values logUne.row.index <- grep("logUne", results.row.names) @@ -481,13 +459,20 @@ if (debug) plot.df$spline <- results$summary[logUne.row.index,"mean"] #browser() +# add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.df$logUi <- pmax(-10 , pmin(20, plot.df$logUi)) + plot.df$logU <- pmax(-10 , pmin(20, plot.df$logU )) + plot.df$logUlcl <- pmax(-10 , pmin(20, plot.df$logUlcl )) + plot.df$logUucl <- pmax(-10 , pmin(20, plot.df$logUucl )) + plot.df$spline <- pmax(-10 , pmin(20, plot.df$spline)) + fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals for estimated log(U[i])")+ geom_point(aes_(y=~logUi), color="red", shape=1)+ # open circle xlab("Time Index\nOpen/closed circles - initial and final estimates")+ylab("log(U[i]) + 95% credible interval")+ geom_point(aes_(y=~logU), color="black", shape=19)+ geom_line (aes_(y=~logU), color="black")+ - geom_errorbar(aes_(ymin=~lcl, ymax=~ucl), width=.1)+ + geom_errorbar(aes_(ymin=~logUlcl, ymax=~logUucl), width=.1)+ geom_line(aes_(y=~spline),linetype="dashed")+ scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+ scale_y_continuous(sec.axis = sec_axis(~ exp(.), name="U + 95% credible interval", @@ -504,7 +489,9 @@ if(save.output.to.files)ggsave(plot=fit.plot, filename=paste(prefix,"-fit.pdf",s results$plots$fit.plot <- fit.plot # plot logit(P) over time -logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2, logitP.cov=new.logitP.cov, results=results) +logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2, + logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.R b/R/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.R index f4405f6..a510bc3 100644 --- a/R/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.R +++ b/R/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.R @@ -1,3 +1,4 @@ +## 2021-10-23 CJS Added trunc.logitP to deal with extreme values of logitP during plotting ## 2020-12-15 CJS removed sampfrac from body of code ## 2020-12-15 CJS Fixed problem when specifyin u2==NA ## 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP @@ -48,15 +49,9 @@ #' \code{\link{TimeStratPetersenDiagError_fit}} function for cases where #' recaptures take place ONLY in the stratum of release, i.e. the diagonal #' case. -#' @param u2 A numeric vector of the number of unmarked fish captured in each -#' stratum. These will be expanded by the capture efficiency to estimate the -#' population size in each stratum. The length of u2 should be between the length of n1 and length n1 + number of columns in m2 -1 +#' @template u2.ND #' @template sampfrac -#' @param jump.after A numeric vector with elements belonging to \code{time}. -#' In some cases, the spline fitting the population numbers should be allowed -#' to jump. For example, the population size could take a jump when hatchery -#' released fish suddenly arrive at the trap. The jumps occur AFTER the strata -#' listed in this argument. +#' @template jump.after #' @template bad.n1 #' @template bad.m2 #' @template bad.u2 @@ -77,39 +72,17 @@ #' are available and 39\% have dropped out or fallen back. #' @param marked_available_x See marked_available_n #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param Delta.max Maximum transition time for marked fish, i.e. all fish -#' assumed to have moved by Delta.max unit of time -#' @param tauTT.alpha One of the parameters along with \code{tauTT.beta} for -#' the prior on 1/var of logit continuation ratio for travel times -#' @param tauTT.beta One of the parameters along with \code{tauTT.alpha} for -#' the prior on 1/var of logit continuation ratio for travel times -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template Delta.max +#' @template tauTT.alpha.beta +#' @template prior.beta.logitP.mean.sd +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed #' @template save.output.to.files +#' @template trunc.logitP #' #' @return An MCMC object with samples from the posterior distribution. A #' series of graphs and text file are also created in the working directory. @@ -141,7 +114,8 @@ TimeStratPetersenNonDiagErrorNPMarkAvail_fit<- function( title="TSPNDENP-avail", run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0, max=1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { ## Fit a Time Stratified Petersen model with NON-diagonal entries and with smoothing on U allowing for random error ## and fall back after tagging. This is based on the Skeena River study, where only 40/66 (60%) acoustically tagged fish ## were observed above the canyon spot and hence 40% of tagged fish never migrated forward of their tagging release spot. @@ -152,7 +126,7 @@ TimeStratPetersenNonDiagErrorNPMarkAvail_fit<- function( title="TSPNDENP-avail", ## strata later. Transisions of marked fish are modelled non-parametrically. ## - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) ## Input parameters are @@ -564,15 +538,24 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP results.row.names <- rownames(results$summary) etaU.row.index <- grep("etaU", results.row.names) etaU<- results$summary[etaU.row.index,] - plot.df$logU =etaU[,"mean"] - plot.df$lcl =etaU[,"2.5%"] - plot.df$ucl =etaU[,"97.5%"] + plot.df$logU = etaU[,"mean"] + plot.df$logUlcl = etaU[,"2.5%"] + plot.df$logUucl = etaU[,"97.5%"] # extract the spline values logUne.row.index <- grep("logUne", results.row.names) logUne<- results$summary[logUne.row.index,"mean"] plot.df$spline <- results$summary[logUne.row.index,"mean"] + + # add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.df$logUi <- pmax(-10 , pmin(20, plot.df$logUi)) + plot.df$logU <- pmax(-10 , pmin(20, plot.df$logU )) + plot.df$logUlcl <- pmax(-10 , pmin(20, plot.df$logUlcl )) + plot.df$logUucl <- pmax(-10 , pmin(20, plot.df$logUucl )) + plot.df$spline <- pmax(-10 , pmin(20, plot.df$spline)) + + fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals for estimated log(U[i])")+ geom_point(aes_(y=~logUi), color="red", shape=1)+ # open circle @@ -580,7 +563,7 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP ylab("log(U[i]) + 95% credible interval")+ geom_point(aes_(y=~logU), color="black", shape=19)+ geom_line (aes_(y=~logU), color="black")+ - geom_errorbar(aes_(ymin=~lcl, ymax=~ucl), width=.1)+ + geom_errorbar(aes_(ymin=~logUlcl, ymax=~logUucl), width=.1)+ geom_line(aes_(y=~spline),linetype="dashed")+ scale_x_continuous(breaks=seq(min(plot.df$time,na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+ scale_y_continuous(sec.axis = sec_axis(~ exp(.), name="U + 95% credible interval", @@ -599,7 +582,9 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP ## acf plot - logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, logitP.cov=new.logitP.cov, results=results) + logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, + logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenNonDiagErrorNP_fit.R b/R/TimeStratPetersenNonDiagErrorNP_fit.R index 12c140d..cf2a29f 100644 --- a/R/TimeStratPetersenNonDiagErrorNP_fit.R +++ b/R/TimeStratPetersenNonDiagErrorNP_fit.R @@ -1,3 +1,4 @@ +## 2021-10-23 CJS Added trunc.logitP to avoid plotting problems with extreme values of logitP ## 2020-12-15 CJS Removed sampfrac from code ## 2020-12-15 CJS Fixed problems when u2 is set to missing ## 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP @@ -58,15 +59,9 @@ #' \code{\link{TimeStratPetersenDiagError_fit}} function for cases where #' recaptures take place ONLY in the stratum of release, i.e. the diagonal #' case. -#' @param u2 A numeric vector of the number of unmarked fish captured in each -#' stratum. These will be expanded by the capture efficiency to estimate the -#' population size in each stratum. The length of u2 should be between the length of n1 and length n1 + number of columns in m2 -1 +#' @template u2.ND #' @template sampfrac -#' @param jump.after A numeric vector with elements belonging to \code{time}. -#' In some cases, the spline fitting the population numbers should be allowed -#' to jump. For example, the population size could take a jump when hatchery -#' released fish suddenly arrive at the trap. The jumps occur AFTER the strata -#' listed in this argument. +#' @template jump.after #' @template bad.n1 #' @template bad.m2 #' @template bad.u2 @@ -81,24 +76,11 @@ #' which on the logit scale gives p[i] essentially 0. Don't specify values such #' as -50 because numerical problems could occur in JAGS. #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param Delta.max Maximum transition time for marked fish, i.e. all fish -#' assumed to have moved by Delta.max unit of time -#' @param tauTT.alpha One of the parameters along with \code{tauTT.beta} for -#' the prior on 1/var of logit continuation ratio for travel times -#' @param tauTT.beta One of the parameters along with \code{tauTT.alpha} for -#' the prior on 1/var of logit continuation ratio for travel times -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template Delta.max +#' @template tauTT.alpha.beta +#' @template prior.beta.logitP.mean.sd #' @param prior.muTT - prior for movement rates. #' These are like a Dirchelet type prior #' where x are values representing belief in the travel times. @@ -108,21 +90,12 @@ #' 4/10=.4 of the animals taking 1 stratum to move etc #' So if x=c(10,40,30,20), this represent the same movement pattern #' but a strong degree of belief -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed #' @template save.output.to.files +#' @template trunc.logitP #' #' @return An MCMC object with samples from the posterior distribution. A #' series of graphs and text file are also created in the working directory. @@ -153,13 +126,14 @@ TimeStratPetersenNonDiagErrorNP_fit<- function( title="TSPNDENP", prefix="TSPNDE run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0,1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { ## Fit a Time Stratified Petersen model with NON-diagonal entries and with smoothing on U allowing for random error ## This is the classical stratified Petersen model where the recoveries can take place for this and multiple ## strata later. Transisions of marked fish are modelled non-parametrically. ## - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) ## Input parameters are @@ -583,15 +557,22 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP results.row.names <- rownames(results$summary) etaU.row.index <- grep("etaU", results.row.names) etaU<- results$summary[etaU.row.index,] - plot.df$logU =etaU[,"mean"] - plot.df$lcl =etaU[,"2.5%"] - plot.df$ucl =etaU[,"97.5%"] + plot.df$logU =etaU[,"mean"] + plot.df$logUlcl =etaU[,"2.5%"] + plot.df$logUucl =etaU[,"97.5%"] # extract the spline values logUne.row.index <- grep("logUne", results.row.names) logUne<- results$summary[logUne.row.index,"mean"] plot.df$spline <- results$summary[logUne.row.index,"mean"] + # add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.df$logUi <- pmax(-10 , pmin(20, plot.df$logUi)) + plot.df$logU <- pmax(-10 , pmin(20, plot.df$logU )) + plot.df$logUlcl <- pmax(-10 , pmin(20, plot.df$logUlcl )) + plot.df$logUucl <- pmax(-10 , pmin(20, plot.df$logUucl )) + plot.df$spline <- pmax(-10 , pmin(20, plot.df$spline)) + fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals for estimated log(U[i])")+ geom_point(aes_(y=~logUi), color="red", shape=1)+ # open circle @@ -599,7 +580,7 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP ylab("log(U[i]) + 95% credible interval")+ geom_point(aes_(y=~logU), color="black", shape=19)+ geom_line (aes_(y=~logU), color="black")+ - geom_errorbar(aes_(ymin=~lcl, ymax=~ucl), width=.1)+ + geom_errorbar(aes_(ymin=~logUlcl, ymax=~logUucl), width=.1)+ geom_line(aes_(y=~spline),linetype="dashed")+ scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+ scale_y_continuous(sec.axis = sec_axis(~ exp(.), name="U + 95% credible interval", @@ -618,7 +599,9 @@ if(length(prior.beta.logitP.mean) != ncol(logitP.cov) | length(prior.beta.logitP ## plot the logitP over time - logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, logitP.cov=new.logitP.cov, results=results) + logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, + logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/TimeStratPetersenNonDiagError_fit.R b/R/TimeStratPetersenNonDiagError_fit.R index 478e82c..d1d0a24 100644 --- a/R/TimeStratPetersenNonDiagError_fit.R +++ b/R/TimeStratPetersenNonDiagError_fit.R @@ -1,3 +1,4 @@ +# 2021-10-23 CJS Added trunc.logitP to deal with extreme values of logitP during plotting # 2020-12-15 CJS Removed sampfrac from code # 2020-12-14 CJS All bad.n1 or bad.m2 are set to 0. No NA allows for n1 or m2 # Fixed problem with some u2=NA in the GOF computations and plots @@ -63,15 +64,9 @@ #' \code{\link{TimeStratPetersenDiagError_fit}} function for cases where #' recaptures take place ONLY in the stratum of release, i.e. the diagonal #' case. -#' @param u2 A numeric vector of the number of unmarked fish captured in each -#' stratum. These will be expanded by the capture efficiency to estimate the -#' population size in each stratum. The length of u2 should be between the length of n1 and length n1 + number of columns in m2 -1 +#' @template u2.ND #' @template sampfrac -#' @param jump.after A numeric vector with elements belonging to \code{time}. -#' In some cases, the spline fitting the population numbers should be allowed -#' to jump. For example, the population size could take a jump when hatchery -#' released fish suddenly arrive at the trap. The jumps occur AFTER the strata -#' listed in this argument. +#' @template jump.after #' @template bad.n1 #' @template bad.m2 #' @template bad.u2 @@ -86,34 +81,16 @@ #' which on the logit scale gives p[i] essentially 0. Don't specify values such #' as -50 because numerical problems could occur in JAGS. #' @template mcmc-parms -#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the -#' prior for the variance of the random noise for the smoothing spline. -#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for -#' the prior for the variance of noise around the spline. -#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for -#' the prior for the variance of noise around the spline. -#' @param prior.beta.logitP.mean Mean of the prior normal distribution for -#' logit(catchability) across strata -#' @param prior.beta.logitP.sd SD of the prior normal distribution for -#' logit(catchability) across strata -#' @param tauP.alpha One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param tauP.beta One of the parameters for the prior for the variance in -#' logit(catchability) among strata -#' @param run.prob Numeric vector indicating percentiles of run timing should -#' be computed. -#' @param debug Logical flag indicating if a debugging run should be made. In -#' the debugging run, the number of samples in the posterior is reduced -#' considerably for a quick turn around. -#' @param debug2 Logical flag indicated if additional debugging information is -#' produced. Normally the functions will halt at \code{browser()} calls to -#' allow the user to peek into the internal variables. Not useful except to -#' package developers. +#' @template tauU.alpha.beta +#' @template taueU.alpha.beta +#' @template prior.beta.logitP.mean.sd +#' @template tauP.alpha.beta +#' @template run.prob +#' @template debug #' @template InitialSeed #' @template save.output.to.files -#' +#' @template trunc.logitP + #' @return An MCMC object with samples from the posterior distribution. A #' series of graphs and text file are also created in the working directory. #' @template author @@ -143,12 +120,13 @@ TimeStratPetersenNonDiagError_fit <- run.prob=seq(0,1,.1), # what percentiles of run timing are wanted debug=FALSE, debug2=FALSE, InitialSeed=ceiling(stats::runif(1,min=0,1000000)), - save.output.to.files=TRUE) { + save.output.to.files=TRUE, + trunc.logitP=15) { # Fit a Time Stratified Petersen model with NON-diagonal entries and with smoothing on U allowing for random error # This is the classical stratified Petersen model where the recoveries can take place for this and multiple # strata later # - version <- '2021-11-01' + version <- '2021-11-02' options(width=200) # Input parameters are @@ -462,7 +440,7 @@ else #notice R syntax requires { before the else Nstrata.rel <- length(n1) Nstrata.cap <- ncol(expanded.m2) -1 # don't forget that last column of m2 is number of fish never seen -# A plot of the observered log(U) on the log scale, and the final mean log(U) +# A plot of the observed log(U) on the log scale, and the final mean log(U) plot.df <- data.frame(time =new.time) plot.df$logUi <-log( c((new.u2[1:Nstrata.rel]+1)*(new.n1+2)/(apply(expanded.m2[,1:Nstrata.cap],1,sum)+1), rep(NA, length(u2)-Nstrata.rel))) @@ -470,15 +448,22 @@ plot.df$logUi <-log( c((new.u2[1:Nstrata.rel]+1)*(new.n1+2)/(apply(expanded.m2[, results.row.names <- rownames(results$summary) etaU.row.index <- grep("etaU", results.row.names) etaU<- results$summary[etaU.row.index,] -plot.df$logU =etaU[,"mean"] -plot.df$lcl =etaU[,"2.5%"] -plot.df$ucl =etaU[,"97.5%"] +plot.df$logU =etaU[,"mean"] +plot.df$logUlcl =etaU[,"2.5%"] +plot.df$logUucl =etaU[,"97.5%"] # extract the spline values logUne.row.index <- grep("logUne", results.row.names) logUne<- results$summary[logUne.row.index,"mean"] plot.df$spline <- results$summary[logUne.row.index,"mean"] +# add limits to the plot to avoid non-monotone secondary axis problems with extreme values + plot.df$logUi <- pmax(-10 , pmin(20, plot.df$logUi)) + plot.df$logU <- pmax(-10 , pmin(20, plot.df$logU )) + plot.df$logUlcl <- pmax(-10 , pmin(20, plot.df$logUlcl )) + plot.df$logUucl <- pmax(-10 , pmin(20, plot.df$logUucl )) + plot.df$spline <- pmax(-10 , pmin(20, plot.df$spline)) + fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals for estimated log(U[i])")+ geom_point(aes_(y=~logUi), color="red", shape=1)+ # open circle @@ -486,7 +471,7 @@ fit.plot <- ggplot(data=plot.df, aes_(x=~time))+ ylab("log(U[i]) + 95% credible interval")+ geom_point(aes_(y=~logU), color="black", shape=19)+ geom_line (aes_(y=~logU), color="black")+ - geom_errorbar(aes_(ymin=~lcl, ymax=~ucl), width=.1)+ + geom_errorbar(aes_(ymin=~logUlcl, ymax=~logUucl), width=.1)+ geom_line(aes_(y=~spline),linetype="dashed")+ scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+ scale_y_continuous(sec.axis = sec_axis(~ exp(.), name="U + 95% credible interval", @@ -505,7 +490,9 @@ results$plots$fit.plot <- fit.plot # plot of the logit(p) over time -logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, logitP.cov=new.logitP.cov, results=results) +logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=expanded.m2, u2=new.u2, + logitP.cov=new.logitP.cov, results=results, + trunc.logitP=trunc.logitP) if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in") results$plots$logitP.plot <- logitP.plot diff --git a/R/plot_logit.R b/R/plot_logit.R index 219b516..7837206 100644 --- a/R/plot_logit.R +++ b/R/plot_logit.R @@ -1,3 +1,4 @@ +# 2021-10-24 CJS Truncate the logitP to avoid problems with plotting # 2014-09-01 CJS First edition of this function # Take the input values and create a ggplot object for the logitP's with the credible intervals plotted # Input are the usual data values along with the MCMC results @@ -6,9 +7,9 @@ #' @import ggplot2 plyr -plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results){ +plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results, trunc.logitP=15){ # Plot the observed and fitted logit(p) values along with posterior limits - # n1, m2, u2 are the raw data (u2 has been adjusted upward for sampling fraction < 1 prior to call) + # n1, m2, u2 are the raw data # logitP.cov is the covariate matrix for modelling the logit(P)'s # results is the summary table from JAGS # @@ -45,6 +46,11 @@ plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results){ logitP.res$lcl <- logitP.res[, "2.5%"] logitP.res$ucl <- logitP.res[,"97.5%"] + # apply limits to the points for plotting purposes + logitP.res$lcl <- pmax(-trunc.logitP, pmin(trunc.logitP, logitP.res$lcl)) + logitP.res$ucl <- pmax(-trunc.logitP, pmin(trunc.logitP, logitP.res$ucl)) + logitP.res$mean <- pmax(-trunc.logitP, pmin(trunc.logitP, logitP.res$mean)) + #browser() myplot <- ggplot(data=logitP.res, aes(x=time, y=mean))+ ggtitle( paste(title,"\nPlot of logit(p[i]) with 95% credible intervals"))+ xlab(xtitle)+ylab("logit(p) + 95% credible interval")+ @@ -56,7 +62,7 @@ plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results){ # If this is a non-diagonal case, also plot the raw logits if(!is.matrix(m2)){ - raw_logitP <- logit((m2+1)/(n1+2)) + raw_logitP <- pmax(-trunc.logitP, pmin(trunc.logitP,logit((m2+1)/(n1+2)))) myplot <- myplot + annotate("point", x=time, y=raw_logitP, shape=1) } # based on raw data @@ -66,9 +72,9 @@ plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results){ # the intercept in most models with covariates along with 95% credible interval intercept.row.index <- grep("beta.logitP[1]", results.row.names, fixed=TRUE) intercept <- results$summary[intercept.row.index,] - mean<- intercept["mean"] - lcl <- intercept["2.5%"] - ucl <- intercept["97.5%"] + mean<- pmax(-trunc.logitP, pmin(trunc.logitP, intercept["mean"] )) + lcl <- pmax(-trunc.logitP, pmin(trunc.logitP, intercept["2.5%"] )) + ucl <- pmax(-trunc.logitP, pmin(trunc.logitP, intercept["97.5%"])) myplot <- myplot + geom_hline(yintercept=mean)+ @@ -78,8 +84,8 @@ plot_logitP <- function(title, time, n1, m2, u2, logitP.cov, results){ # plot the posterior "95% range" for the logit(P)'s based on N(xip, sigmaP^2) sigmaP.row.index <- grep("sigmaP", results.row.names) sigmaP <- results$summary[sigmaP.row.index,] - lcl <- intercept["mean"]-2*sigmaP["mean"] - ucl <- intercept["mean"]+2*sigmaP["mean"] + lcl <- pmax(-trunc.logitP, pmin(trunc.logitP,intercept["mean"]-2*sigmaP["mean"])) + ucl <- pmax(-trunc.logitP, pmin(trunc.logitP,intercept["mean"]+2*sigmaP["mean"])) myplot <- myplot + geom_hline(yintercept=lcl, linetype=3)+ geom_hline(yintercept=ucl, linetype=3) diff --git a/cran-comments.md b/cran-comments.md index 2c40b45..a43cdf0 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,9 +1,8 @@ ## Major change -* Fixed issue with reverse dependency on actuar package with sd() and var() functions as -notified by R team. - +* Fixed issue with error in ggplot when plotting logitP with extreme value and secondary axis is no longer +monotonic (issue #30) ## Test environments * local OS X install, R 4.1.1 diff --git a/man-roxygen/Delta.max.R b/man-roxygen/Delta.max.R new file mode 100644 index 0000000..7f03c76 --- /dev/null +++ b/man-roxygen/Delta.max.R @@ -0,0 +1,2 @@ +#' @param Delta.max Maximum transition time for marked fish, i.e. all fish +#' assumed to have moved by Delta.max unit of time diff --git a/man-roxygen/debug.R b/man-roxygen/debug.R new file mode 100644 index 0000000..8c8d5be --- /dev/null +++ b/man-roxygen/debug.R @@ -0,0 +1,7 @@ +#' @param debug Logical flag indicating if a debugging run should be made. In +#' the debugging run, the number of samples in the posterior is reduced +#' considerably for a quick turn around. +#' @param debug2 Logical flag indicated if additional debugging information is +#' produced. Normally the functions will halt at \code{browser()} calls to +#' allow the user to peek into the internal variables. Not useful except to +#' package developers. diff --git a/man-roxygen/jump.after.R b/man-roxygen/jump.after.R new file mode 100644 index 0000000..16a9e7e --- /dev/null +++ b/man-roxygen/jump.after.R @@ -0,0 +1,6 @@ +#' @param jump.after A numeric vector with elements belonging to \code{time}. +#' In some cases, the spline fitting the population numbers should be allowed +#' to jump. For example, the population size could take a jump when hatchery +#' released fish suddenly arrive at the trap. The jumps occur AFTER the strata +#' listed in this argument. +#' diff --git a/man-roxygen/prior.beta.logitP.mean.sd.R b/man-roxygen/prior.beta.logitP.mean.sd.R new file mode 100644 index 0000000..4f3fa09 --- /dev/null +++ b/man-roxygen/prior.beta.logitP.mean.sd.R @@ -0,0 +1,4 @@ +#' @param prior.beta.logitP.mean Mean of the prior normal distribution for +#' logit(catchability) across strata +#' @param prior.beta.logitP.sd SD of the prior normal distribution for +#' logit(catchability) across strata diff --git a/man-roxygen/run.prob.R b/man-roxygen/run.prob.R new file mode 100644 index 0000000..0a17d06 --- /dev/null +++ b/man-roxygen/run.prob.R @@ -0,0 +1,3 @@ +#' @param run.prob Numeric vector indicating percentiles of run timing should +#' be computed. +#' diff --git a/man-roxygen/tauP.alpha.beta.R b/man-roxygen/tauP.alpha.beta.R new file mode 100644 index 0000000..93b6669 --- /dev/null +++ b/man-roxygen/tauP.alpha.beta.R @@ -0,0 +1,5 @@ +#' @param tauP.alpha One of the parameters for the prior for the variance in +#' logit(catchability) among strata +#' @param tauP.beta One of the parameters for the prior for the variance in +#' logit(catchability) among strata +#' diff --git a/man-roxygen/tauTT.alpha.beta.R b/man-roxygen/tauTT.alpha.beta.R new file mode 100644 index 0000000..1efb014 --- /dev/null +++ b/man-roxygen/tauTT.alpha.beta.R @@ -0,0 +1,5 @@ +#' @param tauTT.alpha One of the parameters along with \code{tauTT.beta} for +#' the prior on 1/var of logit continuation ratio for travel times +#' @param tauTT.beta One of the parameters along with \code{tauTT.alpha} for +#' the prior on 1/var of logit continuation ratio for travel times +#' diff --git a/man-roxygen/tauU.alpha.beta.R b/man-roxygen/tauU.alpha.beta.R new file mode 100644 index 0000000..3395de9 --- /dev/null +++ b/man-roxygen/tauU.alpha.beta.R @@ -0,0 +1,5 @@ +#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the +#' prior for the variance of the random noise for the smoothing spline. +#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the +#' prior for the variance of the random noise for the smoothing spline. +#' diff --git a/man-roxygen/taueU.alpha.beta.R b/man-roxygen/taueU.alpha.beta.R new file mode 100644 index 0000000..b890720 --- /dev/null +++ b/man-roxygen/taueU.alpha.beta.R @@ -0,0 +1,5 @@ +#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for +#' the prior for the variance of noise around the spline. +#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for +#' the prior for the variance of noise around the spline. +#' diff --git a/man-roxygen/trunc.logitP.R b/man-roxygen/trunc.logitP.R new file mode 100644 index 0000000..3187d7f --- /dev/null +++ b/man-roxygen/trunc.logitP.R @@ -0,0 +1,2 @@ +#' @param trunc.logitP Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +#' logitP over time. Actual values of logit(P) are not affected. diff --git a/man-roxygen/u2.D.R b/man-roxygen/u2.D.R new file mode 100644 index 0000000..6c18dc6 --- /dev/null +++ b/man-roxygen/u2.D.R @@ -0,0 +1,3 @@ +#' @param u2 A numeric vector of the number of unmarked fish captured in each +#' stratum. These will be expanded by the capture efficiency to estimate the +#' population size in each stratum. diff --git a/man-roxygen/u2.ND.R b/man-roxygen/u2.ND.R new file mode 100644 index 0000000..7040655 --- /dev/null +++ b/man-roxygen/u2.ND.R @@ -0,0 +1,3 @@ +#' @param u2 A numeric vector of the number of unmarked fish captured in each +#' stratum. These will be expanded by the capture efficiency to estimate the +#' population size in each stratum. The length of u2 should be between the length of n1 and length n1 + number of columns in m2 -1 diff --git a/man/TimeStratPetersenDiagErrorWHChinook_fit.Rd b/man/TimeStratPetersenDiagErrorWHChinook_fit.Rd index ce5f5fc..8160101 100644 --- a/man/TimeStratPetersenDiagErrorWHChinook_fit.Rd +++ b/man/TimeStratPetersenDiagErrorWHChinook_fit.Rd @@ -45,7 +45,8 @@ TimeStratPetersenDiagErrorWHChinook2_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) TimeStratPetersenDiagErrorWHChinook_fit( @@ -82,7 +83,8 @@ TimeStratPetersenDiagErrorWHChinook_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, max = 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -193,6 +195,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} + \item{u2.A}{A numeric vector of the number of unmarked fish with adipose clips captured in each stratum.} diff --git a/man/TimeStratPetersenDiagErrorWHSteel_fit.Rd b/man/TimeStratPetersenDiagErrorWHSteel_fit.Rd index a5c970b..cfe4aab 100644 --- a/man/TimeStratPetersenDiagErrorWHSteel_fit.Rd +++ b/man/TimeStratPetersenDiagErrorWHSteel_fit.Rd @@ -40,7 +40,8 @@ TimeStratPetersenDiagErrorWHSteel_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -156,6 +157,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} + +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} } \value{ An MCMC object with samples from the posterior distribution. A diff --git a/man/TimeStratPetersenDiagError_fit.Rd b/man/TimeStratPetersenDiagError_fit.Rd index e6d0fb4..a0e6aba 100644 --- a/man/TimeStratPetersenDiagError_fit.Rd +++ b/man/TimeStratPetersenDiagError_fit.Rd @@ -38,7 +38,8 @@ TimeStratPetersenDiagError_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, max = 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -67,8 +68,8 @@ for more information.} \item{jump.after}{A numeric vector with elements belonging to \code{time}. In some cases, the spline fitting the population numbers should be allowed -to jump. For example, the population size could take a jump when hatchery -released fish suddenly arrive at the trap. The jumps occur AFTER the strata +to jump. For example, the population size could take a jump when hatchery +released fish suddenly arrive at the trap. The jumps occur AFTER the strata listed in this argument.} \item{bad.n1}{A numeric vector with elements belonging to \code{time}. In @@ -154,6 +155,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} + +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} } \value{ An MCMC object with samples from the posterior distribution. A diff --git a/man/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.Rd b/man/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.Rd index 915ab50..912bb01 100644 --- a/man/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.Rd +++ b/man/TimeStratPetersenNonDiagErrorNPMarkAvail_fit.Rd @@ -42,7 +42,8 @@ TimeStratPetersenNonDiagErrorNPMarkAvail_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, max = 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -180,6 +181,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} + +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} } \value{ An MCMC object with samples from the posterior distribution. A diff --git a/man/TimeStratPetersenNonDiagErrorNP_fit.Rd b/man/TimeStratPetersenNonDiagErrorNP_fit.Rd index 665585f..8499887 100644 --- a/man/TimeStratPetersenNonDiagErrorNP_fit.Rd +++ b/man/TimeStratPetersenNonDiagErrorNP_fit.Rd @@ -41,7 +41,8 @@ TimeStratPetersenNonDiagErrorNP_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -181,6 +182,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} + +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} } \value{ An MCMC object with samples from the posterior distribution. A diff --git a/man/TimeStratPetersenNonDiagError_fit.Rd b/man/TimeStratPetersenNonDiagError_fit.Rd index 5667602..0d4387a 100644 --- a/man/TimeStratPetersenNonDiagError_fit.Rd +++ b/man/TimeStratPetersenNonDiagError_fit.Rd @@ -37,7 +37,8 @@ TimeStratPetersenNonDiagError_fit( debug = FALSE, debug2 = FALSE, InitialSeed = ceiling(stats::runif(1, min = 0, 1e+06)), - save.output.to.files = TRUE + save.output.to.files = TRUE, + trunc.logitP = 15 ) } \arguments{ @@ -158,6 +159,9 @@ in the MCMC iterations.} \item{save.output.to.files}{Should the plots and text output be save to the files in addition to being stored in the MCMC object?} + +\item{trunc.logitP}{Truncate logit(P) between c(=trunc.logitP, trunc.logitP) when plotting the +logitP over time. Actual values of logit(P) are not affected.} } \value{ An MCMC object with samples from the posterior distribution. A diff --git a/render-vignettes.R b/render-vignettes.R index 96768df..e4806a7 100644 --- a/render-vignettes.R +++ b/render-vignettes.R @@ -20,7 +20,7 @@ files <- dir() files <- files[ grepl("Rmd$", files)] files -plyr::l_ply(files[5], function(x){ +plyr::l_ply(files, function(x){ cat("Starting to render ", x, "\n") rmarkdown::render(x) }, .parallel=run.parallel) diff --git a/vignettes/a-Diagonal-model.html b/vignettes/a-Diagonal-model.html index 6ab526c..8910396 100644 --- a/vignettes/a-Diagonal-model.html +++ b/vignettes/a-Diagonal-model.html @@ -12,7 +12,7 @@ - + Diagonal Case @@ -143,7 +143,7 @@

Diagonal Case

Carl James Schwarz

-

2021-10-09

+

2021-10-24

@@ -196,19 +196,19 @@

2021-10-09

-

1 Location of vignette source and code.

+

1 Location of vignette source and code.

Because of the length of time needed to run the vignettes, only static vignettes have been included with this package.

The original of the vignettes and the code can be obtained from the GitHub site at https://github.com/cschwarz-stat-sfu-ca/BTSPAS

-

2 Introduction

+

2 Introduction

The two-sample capture-recapture experiment is one of the simplest possible studies with a long history. The standard Lincoln-Petersen estimator used to estimate abundance is \[ \widehat{N} = \frac{n_1 n_2}{m_2}\] where \(n_1\) is the number of animals captured, marked and released at the first capture event; \(n_2\) is the number of animals captured at the second capture event; and \(m_2\) is the number of animals from \(n_1\) that were recaptured at the second event (i.e. the number of recaptured marked animals).

A key assumption of the Lincoln-Petersen estimator is that there is no correlation in capture-probabilities at the two events. The most common way in which this can occur is if the probability of capture is equal for all animals at either the first or second event.

If capture probabilities are heterogeneous, then estimates can be biased. One way to account for heterogeneous capture-probabilities is through stratification. For example, if males and females have different catchabilities, then separate Lincoln-Petersen estimators can be computed for each sex, and the estimated abundance for the entire population is found by summing the two estimated abundances (one for each sex).

Stratification can be based on animal attributes (e.g. sex), geographic location (e.g. parts of a study area), or temporal (e.g. when captured). The \(BTSPAS\) package deals with temporal stratification.

-

3 Experimental Protocol

+

3 Experimental Protocol

Consider an experiment to estimate the number of outgoing smolts on a small river. The run of smolts extends over several weeks. As smolts migrate, they are captured and marked with individually numbered tags and released at the first capture location using, for example, a fishwheel. The migration continues, and a second fishwheel takes a second sample several kilometers down stream. At the second fishwheel, the captures consist of a mixture of marked (from the first fishwheel) and unmarked fish.

The efficiency of the fishwheels varies over time in response to stream flow, run size passing the wheel and other uncontrollable events. So it is unlikely that the capture probabilities are equal over time at either location, i.e. are heterogeneous over time.

We suppose that we can temporally stratify the data into, for example, weeks, where the capture-probabilities are (mostly) homogeneous at each wheel in each week. Furthermore, suppose that fish captured and marked in each week tend to migrate together so that they are captured in a single subsequent stratum. For example, suppose that in each julian week \(j\), \(n1[j]\) fish are marked and released above the rotary screw trap. Of these, \(m2[j]\) are recaptured. All recaptures take place in the week of release, i.e. the matrix of releases and recoveries is diagonal. The \(n1[j]\) and \(m2[j]\) establish the capture efficiency of the second trap in julian week \(j\).

@@ -227,9 +227,9 @@

3 Experimental Protoco

Here the tagging and recapture events have been stratified into \(k\) temporal strata. Marked fish from one stratum tend to move at similar rates and so are recaptured together with unmarked fish. Recaptures of marked fish take place along the “diagonal.”

-

4 Example of basic BTSPAS fit.

+

4 Example of basic BTSPAS fit.

-

4.1 Reading in the data

+

4.1 Reading in the data

Because the matrix is diagonal, and because the \(u2\) vector is the same length as the \(n1\) and \(m2\) vectors, the data can be entered as several columns. Here is an example of some raw data:

demo.data.csv <- textConnection(
 'jweek, n1, m2, u2
@@ -317,7 +317,7 @@ 

4.1 Reading in the d

The first stratum was defined as julian week 9. At this time, 0 fish were captured, tagged, and released, but 4135 unmarked fish were recovered in the first recovery stratum. In the next week, 1465 fish were captured, tagged, and released, with 51 fish recaptured, and 10452 unmarked fish recovered.

-

4.2 Preliminary screening of the data

+

4.2 Preliminary screening of the data

A pooled-Petersen estimator would add all of the marked, recaptured and unmarked fish to give an estimate of 10,602,698,039 which seems unbelievable!

What went wrong? Let us first examine a plot of the estimated capture efficiency at the second trap for each (recovery) julian week.

@@ -354,7 +354,7 @@

4.2 Preliminary scre

-

4.3 Fitting the basic BTSPAS diagonal model

+

4.3 Fitting the basic BTSPAS diagonal model

The \(BTSPAS\) package attempts to strike a balance between the completely pooled Petersen estimator and the completely stratified Petersen estimator. In the former, capture probabilities are assumed to equal for all fish in all strata, while in the latter, capture probabilities are allowed to vary among strata in no structured way. Furthermore, fish populations often have a general structure to the run, rather than arbitrarily jumping around from stratum to stratum.

Bonner and Schwarz (2011) developed a suite of models that add structure. In the basic model,

    @@ -418,134 +418,133 @@

    4.3 Fitting the basi #> Initial seed for JAGS set to: 890110 #> Random number seed for chain 127579 #> Random number seed for chain 693284 -#> Random number seed for chain 289944 -#> module glm loaded -#> Compiling model graph -#> Resolving undeclared variables -#> Allocating nodes -#> Graph information: -#> Observed stochastic nodes: 72 -#> Unobserved stochastic nodes: 104 -#> Total graph size: 1665 -#> -#> Initializing model +#> Random number seed for chain 289944 +#> Compiling model graph +#> Resolving undeclared variables +#> Allocating nodes +#> Graph information: +#> Observed stochastic nodes: 72 +#> Unobserved stochastic nodes: 104 +#> Total graph size: 1665 +#> +#> Initializing model +#> #> -#> - | - | | 0% - | - |++ | 4% - | - |++++ | 8% - | - |++++++ | 12% - | - |++++++++ | 16% - | - |++++++++++ | 20% - | - |++++++++++++ | 24% - | - |++++++++++++++ | 28% - | - |++++++++++++++++ | 32% - | - |++++++++++++++++++ | 36% - | - |++++++++++++++++++++ | 40% - | - |++++++++++++++++++++++ | 44% - | - |++++++++++++++++++++++++ | 48% - | - |++++++++++++++++++++++++++ | 52% - | - |++++++++++++++++++++++++++++ | 56% - | - |++++++++++++++++++++++++++++++ | 60% - | - |++++++++++++++++++++++++++++++++ | 64% - | - |++++++++++++++++++++++++++++++++++ | 68% - | - |++++++++++++++++++++++++++++++++++++ | 72% - | - |++++++++++++++++++++++++++++++++++++++ | 76% - | - |++++++++++++++++++++++++++++++++++++++++ | 80% - | - |++++++++++++++++++++++++++++++++++++++++++ | 84% - | - |++++++++++++++++++++++++++++++++++++++++++++ | 88% - | - |++++++++++++++++++++++++++++++++++++++++++++++ | 92% - | - |++++++++++++++++++++++++++++++++++++++++++++++++ | 96% - | - |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% -#> - | - | | 0% - | - |** | 4% - | - |**** | 8% - | - |****** | 12% - | - |******** | 16% - | - |********** | 20% - | - |************ | 24% - | - |************** | 28% - | - |**************** | 32% - | - |****************** | 36% - | - |******************** | 40% - | - |********************** | 44% - | - |************************ | 48% - | - |************************** | 52% - | - |**************************** | 56% - | - |****************************** | 60% - | - |******************************** | 64% - | - |********************************** | 68% - | - |************************************ | 72% - | - |************************************** | 76% - | - |**************************************** | 80% - | - |****************************************** | 84% - | - |******************************************** | 88% - | - |********************************************** | 92% - | - |************************************************ | 96% - | - |**************************************************| 100% + | + | | 0% + | + |++ | 4% + | + |++++ | 8% + | + |++++++ | 12% + | + |++++++++ | 16% + | + |++++++++++ | 20% + | + |++++++++++++ | 24% + | + |++++++++++++++ | 28% + | + |++++++++++++++++ | 32% + | + |++++++++++++++++++ | 36% + | + |++++++++++++++++++++ | 40% + | + |++++++++++++++++++++++ | 44% + | + |++++++++++++++++++++++++ | 48% + | + |++++++++++++++++++++++++++ | 52% + | + |++++++++++++++++++++++++++++ | 56% + | + |++++++++++++++++++++++++++++++ | 60% + | + |++++++++++++++++++++++++++++++++ | 64% + | + |++++++++++++++++++++++++++++++++++ | 68% + | + |++++++++++++++++++++++++++++++++++++ | 72% + | + |++++++++++++++++++++++++++++++++++++++ | 76% + | + |++++++++++++++++++++++++++++++++++++++++ | 80% + | + |++++++++++++++++++++++++++++++++++++++++++ | 84% + | + |++++++++++++++++++++++++++++++++++++++++++++ | 88% + | + |++++++++++++++++++++++++++++++++++++++++++++++ | 92% + | + |++++++++++++++++++++++++++++++++++++++++++++++++ | 96% + | + |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% +#> + | + | | 0% + | + |** | 4% + | + |**** | 8% + | + |****** | 12% + | + |******** | 16% + | + |********** | 20% + | + |************ | 24% + | + |************** | 28% + | + |**************** | 32% + | + |****************** | 36% + | + |******************** | 40% + | + |********************** | 44% + | + |************************ | 48% + | + |************************** | 52% + | + |**************************** | 56% + | + |****************************** | 60% + | + |******************************** | 64% + | + |********************************** | 68% + | + |************************************ | 72% + | + |************************************** | 76% + | + |**************************************** | 80% + | + |****************************************** | 84% + | + |******************************************** | 88% + | + |********************************************** | 92% + | + |************************************************ | 96% + | + |**************************************************| 100% +#> #> -#> -#> *** Finished JAGS ***

+#> *** Finished JAGS ***

The final parameter (save.output.to.files) can be set to automatically to save plots and reports in files with the appropriate prefix in the working directory.

The output object contains all of the results and can be saved for later interrogations. This is useful if the run takes considerable time (e.g. overnight) and you want to save the results for later processing. Notice that I didn’t save the results below as part of this vignette.

# save the results in a data dump that can be read in later using the load() command.
 #save(list=c("demo.fit"), file="demo-fit-saved.Rdata")  # save the results from this run
-

4.4 The output from the basic fit

+

4.4 The output from the basic fit

The final object has many components

names(demo.fit)
#>  [1] "n.chains"           "n.iter"             "n.burnin"          
@@ -567,8 +566,8 @@ 

4.4 The output from

These plots are all created using the \(ggplot2\) packages, so the user can modify the plot (e.g. change titles etc).

The \(BTSPAS\) program also creates a report, which includes information about the data used in the fitting, the pooled- and stratified-Petersen estimates, a test for pooling, and summaries of the posterior. Only the first few lines are shown below:

head(demo.fit$report)
-#> [1] "Time Stratified Petersen with Diagonal recaptures and error in smoothed U -  Sat Oct  9 20:45:16 2021"
-#> [2] "Version: 2021-11-01 "                                                                                 
+#> [1] "Time Stratified Petersen with Diagonal recaptures and error in smoothed U -  Sun Oct 24 15:37:06 2021"
+#> [2] "Version: 2021-11-02 "                                                                                 
 #> [3] ""                                                                                                     
 #> [4] ""                                                                                                     
 #> [5] ""                                                                                                     
@@ -611,11 +610,11 @@ 

4.4 The output from

-

5 Fixing p’s

+

5 Fixing p’s

In some cases, the second trap is not running and so there are no recaptures of tagged fish and no captures of untagged fish.

We need to set the p’s in these strata to 0 rather than letting BTSPAS impute a value.

-

5.1 Reading in the data

+

5.1 Reading in the data

Here is an example of some raw data that is read in:

demo2.data.csv <- textConnection(
 'jweek, n1, m2, u2
@@ -703,7 +702,7 @@ 

5.1 Reading in the d

Notice that there are no recaptures of marked fish and no recaptures of unmarked fish in julian weeks 37, 38, 39, and 46 when the trap was not operating. Notice that this differs from the case where a small number of tagged fish released with no recaptures but captures of tagged fish occur such as in julian week 13.

-

5.2 Fitting the BTSPAS diagonal model and fixing p. 

+

5.2 Fitting the BTSPAS diagonal model and fixing p. 

Two additional arguments to the BTSPAS allow you specify the julian weeks in which the capture probability is fixed to a known (typically zero) value.

demo2.logitP.fixed <- c(37,38,39, 46)
 demo2.logitP.fixed.values <- rep(-10, length(demo2.logitP.fixed))
@@ -874,7 +873,7 @@

5.2 Fitting the BTSP #> *** Finished JAGS ***

-

5.3 The output from the fit

+

5.3 The output from the fit

Here is the fitted spline curve to the number of unmarked fish available in each recovery sample. Note how the spline interpolates across the julian weeks when no unmarked fish were captured in julian weeks 37, 38, 39, and 46 but the uncertainty is much larger.

demo2.fit$plots$fit.plot
@@ -910,10 +909,10 @@

5.3 The output from

-

6 Using covariates to model the p’s

+

6 Using covariates to model the p’s

BTSPAS also allows you to model the p’s with additional covariates, such a temperature, stream flow, etc. It is not possible to use covariates to model the total number of unmarked fish.

-

6.1 Reading in the data

+

6.1 Reading in the data

Here is an example of some raw data that includes the covariate \(log(flow)\):

demo3.data.csv <- textConnection(
 'jweek, n1, m2,    u2, logflow
@@ -1002,7 +1001,7 @@ 

6.1 Reading in the d

-

6.2 Fitting the BTSPAS diagonal model and fixing p and covariates for p. 

+

6.2 Fitting the BTSPAS diagonal model and fixing p and covariates for p. 

We need to create a matrix with the covariate values. We will need three columns - one for the intercept, the value of log(flow) and the square of its values. In practice, it is often advisable to standardize covariates to prevent numerical difficulties, but in this case, the values are small enough that standardization is not really needed.

demo3.logitP.cov <- cbind(1, demo3.data$logflow, demo3.data$logflow^2)
 head(demo3.logitP.cov)
@@ -1178,7 +1177,7 @@ 

6.2 Fitting the BTSP #> *** Finished JAGS ***

-

6.3 The output from the fit

+

6.3 The output from the fit

Here is the fitted spline curve to the number of unmarked fish available in each recovery sample.

demo3.fit$plots$fit.plot
@@ -1222,7 +1221,7 @@

6.3 The output from

-

7 Using covariates to model the p’s - prior information about beta coefficients

+

7 Using covariates to model the p’s - prior information about beta coefficients

BTSPAS also allows you to model the p’s with additional covariates, such a temperature, stream flow, etc. It is not possible to use covariates to model the total number of unmarked fish.

In some cases, you may have additional information about the effect of the covariates that you would like to incorporate into the analysis. For example, a rotary screw trap may have run for many years and plots of the relationship between the logit(catchability) vs. log(flow) generally shows a relationship that you may not be able to capture in a single year because of lack of contrast (i.e. the flow within a year does not vary enough) or because of smallish sample sizes.

BTSPAS allows you to specify prior information on the coefficients of the relationship between logit(catchability) and covariates.

@@ -1234,7 +1233,7 @@

7 Using covariates to
  • linear relationship between logit(catchability) and log(flow) with strong priors (and the prior information conflicts with the fit),
  • -

    7.1 Reading in the data

    +

    7.1 Reading in the data

    Here is an example of some (fictitious) raw data that includes the covariate \(log(flow)\):

    demo5.data.csv <- textConnection(
     'time,   n1,     m2,       u2,       logFlow
    @@ -1273,7 +1272,7 @@ 

    7.1 Reading in the d

    -

    7.2 Fitting the BTSPAS diagonal model with no covariates

    +

    7.2 Fitting the BTSPAS diagonal model with no covariates

    We start with fitting BTSPAS with no covariates

    fit1 <- BTSPAS::TimeStratPetersenDiagError_fit(
               title="no covariates"
    @@ -1442,7 +1441,7 @@ 

    7.2 Fitting the BTSP

    The appears to be a relationship with log(flow) that has not been captured with this model.

    -

    7.3 Fitting the BTSPAS diagonal model with using log(flow) and default priors

    +

    7.3 Fitting the BTSPAS diagonal model with using log(flow) and default priors

    We need to create a matrix with the covariate values. We will need two columns - one for the intercept, and one for the value of log(flow). In practice, it is often advisable to standardize covariates to prevent numerical difficulties, but in this case, the values are small enough that standardization is not really needed.

    
     fit2 <- BTSPAS::TimeStratPetersenDiagError_fit(
    @@ -1615,7 +1614,7 @@ 

    7.3 Fitting the BTSP

    We now have a relationship with log(flow), but as you will see later, the evidence is not very strong.

    -

    7.4 Fitting the BTSPAS diagonal model with using log(flow) and weak prior

    +

    7.4 Fitting the BTSPAS diagonal model with using log(flow) and weak prior

    Prior information on the beta coefficients (the intercept and slope) are given using the prior.beta.logitP.mean and prior.beta.logitP.sd parameters in the call. The first specifies the values of the intercept and slope and the second specifies the uncertainty in these prior values.

    Consider the fit:

    
    @@ -1796,7 +1795,7 @@ 

    7.4 Fitting the BTSP

    The weak information on the slope gives a boost to the relationship between log(flow) and the the logit(catchability) especially for those weeks when the sample size is very small.

    -

    7.5 Fitting the BTSPAS diagonal model with using log(flow) and strong prior

    +

    7.5 Fitting the BTSPAS diagonal model with using log(flow) and strong prior

    We repeat the fit but with very strong prior information:

    
     fit4 <- BTSPAS::TimeStratPetersenDiagError_fit(
    @@ -1977,9 +1976,9 @@ 

    7.5 Fitting the BTSP

    Notice that the (strong) prior is now in conflict with the data. The model now believes that the variation around the line must be large, which allows it to move estimates of logit(catchability) below the line.

    -

    7.6 Comparing the results of the fits

    +

    7.6 Comparing the results of the fits

    -

    7.6.1 Estimates of abundance

    +

    7.6.1 Estimates of abundance

    Comparing estimates of abundance among fit @@ -2047,7 +2046,7 @@

    7.6.1 Estimates of

    There appears to be little impact on the estimate of abundance, but notice that the uncertainty declines as you add information from fit1 to *fit3(), but when you add a strong conflicting prior (see below), the uncertainty now increases.

    -

    7.6.2 Estimates of coefficients

    +

    7.6.2 Estimates of coefficients

    Comparing estimates of beta coefficients among fit @@ -2172,7 +2171,7 @@

    7.6.2 Estimates of

    With the strong prior, the data plays essentially no role in determining the slope and intercept. With a weak prior, the estimated slope has lower precision than with the even weaker (default) prior.

    -

    7.6.3 Comparing the residual standard deviation around the line of fit

    +

    7.6.3 Comparing the residual standard deviation around the line of fit

    Comparing estimates of residual variation in logitP among fit @@ -2240,7 +2239,7 @@

    7.6.3 Comparing th

    The residual variation declines as more information is added via the prior for the first 3 fits, but then increases when a strong (conflicting) prior is added (last fit).

    -

    7.6.4 Comparing the fits

    +

    7.6.4 Comparing the fits

    All fits: logitP vs log(flow)

    @@ -2269,10 +2268,10 @@

    7.6.4 Comparing th

    -

    8 Adding structure to the p’s with a spline.

    +

    8 Adding structure to the p’s with a spline.

    The previous example still showed some temporal structure in the \(p\)’s. This additional structure can be imposed by using a spline for the \(p\)’s.

    -

    8.1 Reading in the data

    +

    8.1 Reading in the data

    Here is an example of some raw data:

    demo4.data.csv <- textConnection(
     'jweek, n1, m2,    u2 
    @@ -2361,7 +2360,7 @@ 

    8.1 Reading in the d

    -

    8.2 Fitting the BTSPAS diagonal model and a spline for p. 

    +

    8.2 Fitting the BTSPAS diagonal model and a spline for p. 

    We can create a set of covariates that serve as the basis for the spline over time using the \(bs()\) function from the splines package:

    library(splines)
     demo4.logitP.cov <- bs(1:length(demo4.data$n1), df=floor(length(demo4.data$n1)/4), intercept=TRUE)
    @@ -2538,7 +2537,7 @@ 

    8.2 Fitting the BTSP #> *** Finished JAGS ***

    -

    8.3 The output from the fit

    +

    8.3 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available in each recovery sample.

    demo4.fit$plots$fit.plot
    @@ -2576,7 +2575,7 @@

    8.3 The output from

    -

    9 References

    +

    9 References

    Bonner, S. J., & Schwarz, C. J. (2011). Smoothing population size estimates for Time-Stratified Mark–Recapture experiments Using Bayesian P-Splines. Biometrics, 67, 1498–1507. https://doi.org/10.1111/j.1541-0420.2011.01599.x

    Schwarz, C. J., & Dempson, J. B. (1994). Mark-recapture estimation of a salmon smolt population. Biometrics, 50, 98–108.

    diff --git a/vignettes/b-Diagonal-model-with-multiple-ages.html b/vignettes/b-Diagonal-model-with-multiple-ages.html index df41d92..07de54e 100644 --- a/vignettes/b-Diagonal-model-with-multiple-ages.html +++ b/vignettes/b-Diagonal-model-with-multiple-ages.html @@ -12,7 +12,7 @@ - + Diagonal Case - Multiple Stocks/Ages @@ -141,7 +141,7 @@

    Diagonal Case - Multiple Stocks/Ages

    Carl James Schwarz

    -

    2021-10-09

    +

    2021-10-24

    @@ -174,21 +174,21 @@

    2021-10-09

    -

    1 Location of vignette source and code.

    +

    1 Location of vignette source and code.

    Because of the length of time needed to run the vignettes, only static vignettes have been included with this package.

    The original of the vignettes and the code can be obtained from the GitHub site at https://github.com/cschwarz-stat-sfu-ca/BTSPAS

    -

    2 Introduction

    +

    2 Introduction

    In some cases, the population of fish consist of a mixture of ages (young of year, and juvenile) and stocks (wild and hatchery). BTSPAS has a number of routines to handle three common occurrences as explained below.

    In all cases, only diagonal recoveries are allowed.

    -

    2.1 Fixing values of \(p\) or using covariates.

    +

    2.1 Fixing values of \(p\) or using covariates.

    Refer to the vignette on the Diagonal Case for information about fixing values of \(p\) or modelling \(p\) using covariates such a stream flow or smoothing \(p\) using a temporal spline.

    -

    3 Wild and Hatchery Chinook

    +

    3 Wild and Hatchery Chinook

    In each stratum \(j\), \(n1[j]\) fish are marked and released above a rotary screw trap. Of these, \(m2[j]\) are recaptured in the stratum of release, i.e. the matrix of releases and recoveries is diagonal. The \(n1[j]\) and \(m2[j]\) establish the capture efficiency of the trap.

    At the same time, \(u2[j]\) unmarked fish are captured at the screw trap. These fish are a mixture of wild and hatchery raised Chinook Salmon. A portion (clip.rate) of the hatchery raised fish are adipose fin clipped and can be recognized as hatchery raised. The unclipped fish are a mixture of wild and hatchery fish which must be separated. Hence the \(u2[j]\) are separated into:

      @@ -196,7 +196,7 @@

      3 Wild and Hatchery Ch
    • \(u2.N[j]\) representing the number of unclipped fish which are mixture of hatchery and wild fish.
    -

    3.1 Reading in the data

    +

    3.1 Reading in the data

    Here is an example of some raw data that is read in:

    demo.data.csv <- textConnection(
     "     jweek,      n1,      m2,      u2.A,      u2.N
    @@ -276,7 +276,7 @@ 

    3.1 Reading in the d

    -

    3.2 Fitting the BTSPAS diagonal model

    +

    3.2 Fitting the BTSPAS diagonal model

    We already read in the data above. Here we set the rest of the parameters.

    • the hatch.after variable indicates the stratum after which hatchery fish are released
    • @@ -455,7 +455,7 @@

      3.2 Fitting the BTSP #> Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.

    -

    3.3 The output from the fit

    +

    3.3 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available in each recovery sample

    demo.fit$plots$fit.plot
    @@ -483,7 +483,7 @@

    3.3 The output from

    -

    4 Wild and Hatchery Chinook with YoY and Age 1 fish.

    +

    4 Wild and Hatchery Chinook with YoY and Age 1 fish.

    In this example, BTSPAS allows for separating wild from hatchery Chinook salmon when Age-1 Chinook Salmon are present (residualized) from last year.

    In each stratum \(j\), \(n1[j]\) fish are marked and released above a rotary screw trap. Of these, \(m2[j]\) are recaptured in the stratum of release, i.e. the matrix of releases and recoveries is diagonal. The \(n1[j]\) and \(m2[j]\) establish the capture efficiency of the trap.

    At the same time, \(u2[j]\) unmarked fish are captured at the screw trap. These fish are a mixture of YoY and Age-1 wild and hatchery raised Chinook Salmon. A portion (clip.rate.H.YoY, clip.rate.H.1) of the YoY and Age1 hatchery raised fish are adipose fin clipped and can be recognized as hatchery raised. The unclipped fish are a mixture of wild and hatchery fish which must be separated. Hence the \(u2[j]\) are separated into

    @@ -494,7 +494,7 @@

    4 Wild and Hatchery Ch
  • \(u2.N.1 [j]\) representing the number of Age1 unclipped fish) which are mixture of hatchery and wild fish.
  • -

    4.1 Reading in the data

    +

    4.1 Reading in the data

    Here is an example of some raw data that is read in:

    demo2.data.csv <- textConnection(
     "     jweek,      n1,      m2,      u2.A.YoY,      u2.N.YoY,      u2.A.1,      u2.N.1
    @@ -579,7 +579,7 @@ 

    4.1 Reading in the d

    -

    4.2 Fitting the BTSPAS diagonal model

    +

    4.2 Fitting the BTSPAS diagonal model

    We already read in the data above. Here we set the rest of the parameters.

    • the hatch.after variable indicates the stratum after which hatchery fish are released
    • @@ -765,11 +765,11 @@

      4.2 Fitting the BTSP #> *** Finished JAGS ***

    -

    4.3 The output from the fit

    +

    4.3 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available of both stocks and ages.

    demo2.fit$plots$fit.plot
    -Fitted spline curve +Fitted spline curve

    Fitted spline curve

    @@ -797,7 +797,7 @@

    4.3 The output from

    -

    5 Wild and Hatchery Steelhead with YoY and Age 1 fish.

    +

    5 Wild and Hatchery Steelhead with YoY and Age 1 fish.

    In this analysis we fit a diagonal time-stratified Petersen estimator separating wind from hatchery Steelhead salmon..

    This differs from the Wild vs Hatchery Chinook salmon in previous sections in that all hatchery raised steelhead are marked, so there is complete separation by age and (wild/hatchery). There are 3 population of interest, Wild.YoY, Hatchery.Age1+, and Wild.Age1+.

    This analysis is based on the analysis of California Junction City 2003 Steelhead data and is the example used in the Trinity River Project.

    @@ -809,7 +809,7 @@

    5 Wild and Hatchery St
  • \(u2.H.1 [j]\) representing hatchery, age 1+ steelhead.
  • -

    5.1 Reading in the data

    +

    5.1 Reading in the data

    Here is an example of some raw data that is read in:

    demo3.data.csv <- textConnection(
     "     jweek,      n1,      m2,      u2.W.YoY,      u2.W.1,      u2.H.1
    @@ -902,7 +902,7 @@ 

    5.1 Reading in the d

    -

    5.2 Fitting the BTSPAS diagonal model

    +

    5.2 Fitting the BTSPAS diagonal model

    We already read in the data above. Here we set the rest of the parameters.

    • the hatch.after variable indicates the stratum after which hatchery fish are released
    • @@ -1082,7 +1082,7 @@

      5.2 Fitting the BTSP

      The final parameter (save.output.to.files) can be set to automatically to save plots and reports in files with the appropriate prefix in the working directory.

    -

    5.3 The output from the fit

    +

    5.3 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available of both stocks and ages.

    demo3.fit$plots$fit.plot
    @@ -1111,7 +1111,7 @@

    5.3 The output from

    -

    6 References

    +

    6 References

    Bonner, S. J., & Schwarz, C. J. (2011). Smoothing population size estimates for Time-Stratified Mark–Recapture experiments Using Bayesian P-Splines. Biometrics, 67, 1498–1507. https://doi.org/10.1111/j.1541-0420.2011.01599.x

    Schwarz, C. J., & Dempson, J. B. (1994). Mark-recapture estimation of a salmon smolt population. Biometrics, 50, 98–108.

    diff --git a/vignettes/c-Non-diagonal-model.html b/vignettes/c-Non-diagonal-model.html index ab6015a..eca6941 100644 --- a/vignettes/c-Non-diagonal-model.html +++ b/vignettes/c-Non-diagonal-model.html @@ -12,7 +12,7 @@ - + Non-Diagonal Case @@ -141,7 +141,7 @@

    Non-Diagonal Case

    Carl James Schwarz

    -

    2021-10-09

    +

    2021-10-24

    @@ -173,12 +173,12 @@

    2021-10-09

    -

    1 Location of vignette source and code.

    +

    1 Location of vignette source and code.

    Because of the length of time needed to run the vignettes, only static vignettes have been included with this package.

    The original of the vignettes and the code can be obtained from the GitHub site at https://github.com/cschwarz-stat-sfu-ca/BTSPAS

    -

    2 Introduction

    +

    2 Introduction

    This case represents a generalization of the diagonal case considered in a separate vignette. Now, rather than assuming that all recaptures from a release take place in a single recovery stratum, recoveries could take place over multiple recovery strata.

    Again, consider an experiment to estimate the number of outgoing smolts on a small river. The run of smolts extends over several weeks. As smolts migrate, they are captured and marked with individually numbered tags and released at the first capture location using, for example, a fishwheel. The migration continues, and a second fishwheel takes a second sample several kilometers down stream. At the second fishwheel, the captures consist of a mixture of marked (from the first fishwheel) and unmarked fish.

    The efficiency of the fishwheels varies over time in response to stream flow, run size passing the wheel and other uncontrollable events. So it is unlikely that the capture probabilities are equal over time at either location, i.e. are heterogeneous over time.

    @@ -199,14 +199,14 @@

    2 Introduction

    Here the tagging and recapture events have been stratified in to \(k\) temporal strata. Marked fish from one stratum tend to spread out and are recaptured over multiple strata. Several additional recovery strata are needed at the end of the experiment to fully capture the final release stratum.

    Because the lower diagonal of the recovery matrix is zero, the data can be entered in a shorthand fashion by showing the recoveries in the same stratum as release, the next stratum, etc, up to a maximum number of recovery strata per release.

    -

    2.1 Fixing values of \(p\) or using covariates.

    +

    2.1 Fixing values of \(p\) or using covariates.

    Refer to the vignette on the Diagonal Case for information about fixing values of \(p\) or modelling \(p\) using covariates such a stream flow or smoothing \(p\) using a temporal spline.

    -

    3 Example of basic non-diagonal BTSPAS fit.

    +

    3 Example of basic non-diagonal BTSPAS fit.

    -

    3.1 Reading in the data

    +

    3.1 Reading in the data

    Here is an example of some raw data that is read in:

    demo.data.csv <- textConnection(
     "Date        ,     n1  , X0  , X1  , X2  , X3  , X4
    @@ -317,7 +317,7 @@ 

    3.1 Reading in the d
    demo.data.m2 <- as.matrix(demo.data[,c("X0","X1","X2","X3","X4")])

    -

    3.2 Preliminary screening of the data

    +

    3.2 Preliminary screening of the data

    A pooled-Petersen estimator would add all of the marked, recaptured and unmarked fish to give an estimate of 73,385.

    Let us look at the data in more detail:

      @@ -341,7 +341,7 @@

      3.2 Preliminary scre

    -

    4 Fitting the BTSPAS non-diagonal model with a log-normal movement distribution.

    +

    4 Fitting the BTSPAS non-diagonal model with a log-normal movement distribution.

    Bonner and Schwarz (2011) developed a model with the following features.

    • Log-normal distribution of the distribution of times between release and availability at the second trap.
    • @@ -526,7 +526,7 @@

      4 Fitting the BTSPAS n #> *** Finished JAGS ***

    The final parameter (save.output.to.files) can be set to automatically to save plots and reports in files with the appropriate prefix in the working directory.

    -

    4.1 The output from the fit

    +

    4.1 The output from the fit

    The final object has many components

    names(demo.fit)
    #>  [1] "n.chains"           "n.iter"             "n.burnin"          
    @@ -548,8 +548,8 @@ 

    4.1 The output from

    These plots are all created using the \(ggplot2\) packages, so the user can modify the plot (e.g. change titles etc).

    The \(BTSPAS\) program also creates a report, which includes information about the data used in the fitting, the pooled- and stratified-Petersen estimates, a test for pooling, and summaries of the posterior. Only the first few lines are shown below:

    head(demo.fit$report)
    -#> [1] "Time Stratified Petersen with Non-Diagonal recaptures, error in smoothed U, and log-normal distribution for travel time -  Sat Oct  9 20:55:18 2021"
    -#> [2] "Version:  2021-11-01"                                                                                                                               
    +#> [1] "Time Stratified Petersen with Non-Diagonal recaptures, error in smoothed U, and log-normal distribution for travel time -  Sun Oct 24 15:46:59 2021"
    +#> [2] "Version:  2021-11-02"                                                                                                                               
     #> [3] ""                                                                                                                                                   
     #> [4] " Conne River 1987 Atlantic Salmon Smolts - Log-normal Results "                                                                                     
     #> [5] ""                                                                                                                                                   
    @@ -625,7 +625,7 @@ 

    4.1 The output from

    -

    5 Fitting the BTSPAS non-diagonal model with a non-parametric movement distribution.

    +

    5 Fitting the BTSPAS non-diagonal model with a non-parametric movement distribution.

    Bonner and Schwarz (2011) developed a model with the following features.

    • Non-parametric distribution of the distribution of times between release and availability at the second trap.
    • @@ -809,7 +809,7 @@

      5 Fitting the BTSPAS n #> *** Finished JAGS ***

    The final parameter (save.output.to.files) can be set to automatically to save plots and reports in files with the appropriate prefix in the working directory.

    -

    5.1 The output from the fit

    +

    5.1 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available in each recovery sample

    demo2.fit$plots$fit.plot
    @@ -865,7 +865,7 @@

    5.1 The output from

    -

    6 Prior information on the movement probabilities.

    +

    6 Prior information on the movement probabilities.

    It is possible to impose prior information on the movement probabilities in both cases. This would be useful in cases with VERY sparse data!

    In the non-parametric case, specify a vector that gives the relative weight of belief of movement. These are similar to a Dirchelet-type prior where the values representing belief in the distribution of travel times. For example, \(prior.muTT=c(1,4,3,2)\) represents a system where the maximum travel time is 3 strata after release with \(1/10=.1\) of the animals moving in the stratum of release \(4/10=.4\) of the animals taking 1 stratum to move etc So if \(prior.muTT=c(10,40,30,20)\), this represent the same movement pattern but a strong degree of belief because all of the numbers are larger. AN intuitive explanation is that the \(sum(prior.muTT)\) represents the number of animals observed to make this travel time distribution.

    Here we will fit a fairly strong prior on the movement probabilities:

    @@ -1037,7 +1037,7 @@

    6 Prior information on #> *** Finished JAGS ***

    The final parameter (save.output.to.files) can be set to automatically to save plots and reports in files with the appropriate prefix in the working directory.

    -

    6.1 The output from the fit

    +

    6.1 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available in each recovery sample

    demo3.fit$plots$fit.plot
    @@ -1093,7 +1093,7 @@

    6.1 The output from

    -

    7 References

    +

    7 References

    Bonner, S. J., & Schwarz, C. J. (2011). Smoothing population size estimates for Time-Stratified Mark–Recapture experiments Using Bayesian P-Splines. Biometrics, 67, 1498–1507. https://doi.org/10.1111/j.1541-0420.2011.01599.x

    Schwarz, C. J., & Dempson, J. B. (1994). Mark-recapture estimation of a salmon smolt population. Biometrics, 50, 98–108.

    diff --git a/vignettes/d-Non-diagonal-with-fall-back-model.html b/vignettes/d-Non-diagonal-with-fall-back-model.html index 74b74e2..c595cd0 100644 --- a/vignettes/d-Non-diagonal-with-fall-back-model.html +++ b/vignettes/d-Non-diagonal-with-fall-back-model.html @@ -12,7 +12,7 @@ - + Non-Diagonal Fall-Back Model @@ -141,7 +141,7 @@

    Non-Diagonal Fall-Back Model

    Carl James Schwarz

    -

    2021-10-09

    +

    2021-10-24

    @@ -164,14 +164,14 @@

    2021-10-09

    -

    1 Location of vignette source and code.

    +

    1 Location of vignette source and code.

    Because of the length of time needed to run the vignettes, only static vignettes have been included with this package.

    The original of the vignettes and the code can be obtained from the GitHub site at https://github.com/cschwarz-stat-sfu-ca/BTSPAS

    -

    2 Introduction

    +

    2 Introduction

    -

    2.1 Experimental set-up

    +

    2.1 Experimental set-up

    This case represents a generalization of the non-diagonal case considered in a separate vignette. Now we allow some fish (marked and unmarked) to approach the second trap, but fall back and never pass the trap. Schwarz and Bonner (2011) considered this model to estimate the number of steelhead that passed upstream of Moricetown Canyon.

    The experimental setup is the same as the non-diagonal case. Consider an experiment to estimate the number of outgoing smolts on a small river. The run of smolts extends over several weeks. As smolts migrate, they are captured and marked with individually numbered tags and released at the first capture location using, for example, a fishwheel. The migration continues, and a second fishwheel takes a second sample several kilometers down stream. At the second fishwheel, the captures consist of a mixture of marked (from the first fishwheel) and unmarked fish.

    The efficiency of the fishwheels varies over time in response to stream flow, run size passing the wheel and other uncontrollable events. So it is unlikely that the capture probabilities are equal over time at either location, i.e. are heterogeneous over time.

    @@ -193,7 +193,7 @@

    2.1 Experimental set

    Because the lower diagonal of the recovery matrix is zero, the data can be entered in a shorthand fashion by showing the recoveries in the same stratum as release, the next stratum, etc, up to a maximum number of recovery strata per release.

    -

    2.2 Fall-back information

    +

    2.2 Fall-back information

    This information is obtained by also marking radio-tagged fish whose ultimate fate (i.e. did they pass the second trap nor not) can be determined. We measure:

    • \(marked\_available\_n\) representing the number of radio-tagged fish.
    • @@ -203,14 +203,14 @@

      2.2 Fall-back inform

      This model could also be used for mortality between the marking and recovery trap.

    -

    2.3 Fixing values of \(p\) or using covariates.

    +

    2.3 Fixing values of \(p\) or using covariates.

    Refer to the vignette on the Diagonal Case for information about fixing values of \(p\) or modelling \(p\) using covariates such a stream flow or smoothing \(p\) using a temporal spline.

    -

    3 Example of non-diagonal model with fall-back.

    +

    3 Example of non-diagonal model with fall-back.

    -

    3.1 Reading in the data

    +

    3.1 Reading in the data

    Here is an example of some raw data that is read in:

    demo.data.csv <- textConnection("
     jweek,n1,    X0,X1 ,X2 ,X3,X4,X5,X6,X7 
    @@ -257,7 +257,7 @@ 

    3.1 Reading in the d demo.mark.available.x <- 40

    -

    3.2 Fitting the BTSPAS non-diagonal model with fall-back model with a non-parametric movement distribution.

    +

    3.2 Fitting the BTSPAS non-diagonal model with fall-back model with a non-parametric movement distribution.

    Schwarz and Bonner (2011) extended Bonner and Schwarz (2011) with a model with the following features.

    • Non-parametric distribution of the distribution of times between release and availability at the second trap.
    • @@ -442,7 +442,7 @@

      3.2 Fitting the BTSP #> *** Finished JAGS ***

    -

    3.3 The output from the fit

    +

    3.3 The output from the fit

    Here is the fitted spline curve to the number of unmarked fish available in each recovery stratum at the second trap

    demo.fit$plots$fit.plot
    @@ -509,7 +509,7 @@

    3.3 The output from

    -

    4 References

    +

    4 References

    Bonner, S. J., & Schwarz, C. J. (2011). Smoothing population size estimates for Time-Stratified Mark–Recapture experiments Using Bayesian P-Splines. Biometrics, 67, 1498–1507. https://doi.org/10.1111/j.1541-0420.2011.01599.x

    Schwarz, C. J. and Bonner, S. B. (2011). A spline-based capture-mark-recapture model applied to estimating the number of steelhead within the Bulkley River passing the Moricetown Canyon in 2001-2010. Prepared for the B.C. Ministry of Environment.

    Schwarz, C. J., & Dempson, J. B. (1994). Mark-recapture estimation of a salmon smolt population. Biometrics, 50, 98–108.

    diff --git a/vignettes/e-Bias-from-incomplete-sampling.html b/vignettes/e-Bias-from-incomplete-sampling.html index 2e8db7b..92fcfb8 100644 --- a/vignettes/e-Bias-from-incomplete-sampling.html +++ b/vignettes/e-Bias-from-incomplete-sampling.html @@ -12,7 +12,7 @@ - + Bias caused by incomplete sampling @@ -141,7 +141,7 @@

    Bias caused by incomplete sampling

    Carl James Schwarz

    -

    2021-10-09

    +

    2021-10-24

    @@ -169,22 +169,22 @@

    2021-10-09

    -

    1 Location of vignette source and code.

    +

    1 Location of vignette source and code.

    Because of the length of time needed to run the vignettes, only static vignettes have been included with this package.

    The original of the vignettes and the code can be obtained from the GitHub site at https://github.com/cschwarz-stat-sfu-ca/BTSPAS

    -

    2 Introduction

    +

    2 Introduction

    This document will illustrate the potential biases caused by incomplete sampling in the recovery strata. For example, suppose that stratification is at a weekly level. Fish are tagged and released continuously during the week. Recoveries occur from a commercial fishery that only operating for 1/2 a week (the first half). This may cause bias in estimates of abundance because, for example, fish tagged at the end of a week, may arrive at the commercial fishery in the second half of the recovery week and not be subject to capture. This causes heterogeneity in recovery probabilities that is not accounted for in the mark-recapture analysis.

    A simulated population will be created and then analyzed in several ways to illustrate the potential extent of bias, and how to properly stratify the data to account for this problem.

    This scenario was originally envisioned to be handled with the sampfrac argument of the BTSPAS routines. However, the actual implementation is incorrect in BTSPAS and is deprecated. This vignette shows the proper way to deal with this problem.

    -

    2.1 Experimental setup

    +

    2.1 Experimental setup

    This simulated population is modelled around a capture-capture experiment on the Taku River which flows between the US and Canada.

    Returning salmon arrive and are captured at a fish wheel during several weeks. Those fish captured at the fish wheel are tagged and released (daily). They migrate upstream to a commercial fishery. The commercial fishery does not operate on all days of the week - in particular, the fishery tends to operate during the first part of the week until the quota for catch is reached. Then the fishery stops until the next week.

    -

    2.2 Generation of population

    +

    2.2 Generation of population

    A population of 150,000 fish will be simulated arriving at the fish wheels according to a normal distribution with a mean of 42 and a standard deviation of 15. This gives a distribution of arrival times at the fish wheel of

    Distribution of arrival time at wheel @@ -284,14 +284,14 @@

    2.2 Generation of po

    -

    3 Analysis of dataset stratified to the 3-day strata.

    +

    3 Analysis of dataset stratified to the 3-day strata.

    We are now back to familiar territory.

    -

    3.1 Pooled Petersen estimator

    +

    3.1 Pooled Petersen estimator

    The pooled Petersen estimator of abundance is 131,314 (SE 7,623 ). Notice the negative bias in the estimate as predicted.

    -

    3.2 BTSPAS on the full dataset

    +

    3.2 BTSPAS on the full dataset

    We prepare the data in the usual way with the following results:

    ## Stratum
    ##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
    @@ -492,12 +492,12 @@

    3.2 BTSPAS ## *** Finished JAGS ***

    -

    3.3 Exploring the output

    +

    3.3 Exploring the output

    • The revised fitted run curve of the unmarked individuals (the recovery sample would be added to this curve)
    -Fitted run curve with zeroes +Fitted run curve with zeroes

    Fitted run curve with zeroes

    @@ -568,7 +568,7 @@

    3.3 Exploring the ou

    -

    4 Analysis at the 6-day stratum level.

    +

    4 Analysis at the 6-day stratum level.

    Many of the analyses stratify to the statistical week, so only part of the week is fished by the commercial fishery. As noted previously, if the fish wheels sample a constant proportion of the run, then it doesn’t matter how the recovery sample is obtained – the Pooled Petersen estimator will still be unbiased.

    We simulate the coarser stratification by taking the previous simulated population and pool adjacent 3-day strata. The pooled data is:

    ##          tagged S1 S2  S3  S4  S5  S6  S7  S8  S9 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20
    @@ -592,11 +592,11 @@ 

    4 Analysis at the 6-da ## untagged NA 14 52 132 285 383 415 396 425 405 418 384 385 187 89 0 0 0 0 0 0

    Notice that no recovery strata are now zero (except at the end of the study)

    -

    4.1 Pooled Petersen estimator

    +

    4.1 Pooled Petersen estimator

    The pooled Petersen estimator of abundance is the same as before as there are no changes to the number tagged, recaptured, or fished. 131,314 (SE 7,623 ).

    -

    4.2 BTSPAS on the pooled dataset

    +

    4.2 BTSPAS on the pooled dataset

    We prepare the data in the usual way with the following results:

    ## Stratum
    ##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
    @@ -776,7 +776,7 @@

    4.2 BTSPAS ## *** Finished JAGS ***

    -

    4.3 Exploring the output

    +

    4.3 Exploring the output

    • The revised fitted run curve of the unmarked individuals (the recovery sample would be added to this curve)
    @@ -832,7 +832,7 @@

    4.3 Exploring the ou

    -

    5 References

    +

    5 References

    Bonner Simon, J., & Schwarz Carl, J. (2011). Smoothing Population Size Estimates for Time-Stratified MarkRecapture Experiments Using Bayesian P-Splines. Biometrics, 67, 1498–1507. https://doi.org/10.1111/j.1541-0420.2011.01599.x

    Darroch, J. N. (1961). The two-sample capture-recapture census when tagging and sampling are stratified. Biometrika, 48, 241–260. https://doi.org/10.1093/biomet/48.3-4.241

    Plante, N., L.-P Rivest, and G. Tremblay. (1988). Stratified Capture-Recapture Estimation of the Size of a Closed Population. Biometrics 54, 47-60. https://doi.org/10.2307/2533994

    diff --git a/vignettes/f-Interpolating-run-earlier-and-later.html b/vignettes/f-Interpolating-run-earlier-and-later.html index a77f13a..7e4db4d 100644 --- a/vignettes/f-Interpolating-run-earlier-and-later.html +++ b/vignettes/f-Interpolating-run-earlier-and-later.html @@ -12,7 +12,7 @@ - + Interpolating run early and late @@ -46,7 +46,7 @@

    Interpolating run early and late

    Carl James Schwarz

    -

    2021-10-09

    +

    2021-10-24

    @@ -60,7 +60,7 @@

    2021-10-09

    -

    1 Introduction

    +

    1 Introduction

    In some studies, harvest (recovery strata) start after the run has started and terminate prior to the run ending. For example, consider the following recovery matrix where releases and recoveries have been stratified on a weekly basis:

    ##      Tagging SW22 SW23 SW24 SW25 SW26 SW27 SW28 SW29 SW30 SW31 SW32 SW33 SW34 SW35 SW36 SW37 Applied
     ## 1       SW22    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0      10
    @@ -82,7 +82,7 @@ 

    1 Introduction

    ## 17 CatchComm 0 0 0 1869 5394 5131 5668 6733 1780 1828 2493 157 0 0 0 0 NA

    The bottom line is the total recoveries (tagged and untagged) from a commercial harvest. In this case, the commercial harvest did not start until statistical week SW25 and ended in SW33 but the run started earlier and ended later than the commercial harvest.

    -

    1.1 Fit with the current data.

    +

    1.1 Fit with the current data.

    We now fit the BTSPAS model using the current data

    ## 
     ## 
    @@ -245,7 +245,7 @@ 

    1.1 Fit with the cur

    The problem is that without a commercial catch in the first 3 and last 3 weeks, there is no information about the probability of capture for those weeks and BTSPAS simply interpolates the spline from the middle of the data to the first 3 and last 3 weeks. The interpolation for the last 3 weeks isn’t too bad – the spline is already on a downwards trend and so this is continued. However, the interpolation back for the first 3 weeks is not very realistic

    -

    1.2 Forcing the run curve to zero.

    +

    1.2 Forcing the run curve to zero.

    It is possible to “force” BTSPAS to interpolate the first 3 and last 3 weeks down to zero by adding ``fake’’ data. In particular, we pretend that in the first 3 and last 3 weeks, that a commercial catch of 1 fish occurred and it was tagged. You also need to ensure that enough fish were tagged and released to accommodate the fake data.

    The revised recovery matrix is:

    ##      Tagging SW22 SW23 SW24 SW25 SW26 SW27 SW28 SW29 SW30 SW31 SW32 SW33 SW34 SW35 SW36 SW37 Applied
    @@ -405,7 +405,7 @@ 

    1.2 Forcing the run
    ## [1] TRUE
    ## [1] TRUE

    Notice that in the revised fit, the run curve is forced to 0 at the start and end of the study:

    -

    +

    The estimates of total run size and the weekly estimates of the runsize are also more sensible:

    ##         mean   sd   2.5%  97.5%
     ## Ntot  126876 3935 119430 134994