Skip to content

Commit

Permalink
Fixed problem when plotting logitP or logU when extreme values presen…
Browse files Browse the repository at this point in the history
…t (issue #30).
  • Loading branch information
cschwarz-stat-sfu-ca committed Oct 24, 2021
1 parent 1be18a7 commit 83e39b4
Show file tree
Hide file tree
Showing 38 changed files with 527 additions and 497 deletions.
4 changes: 2 additions & 2 deletions CRAN-RELEASE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
This package was submitted to CRAN on 2021-10-09.
Once it is accepted, delete this file and tag the release (commit 7d17b50).
This package was submitted to CRAN on 2021-10-24.
Once it is accepted, delete this file and tag the release (commit 1be18a7).
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BTSPAS
Version: 2021.11.1
Date: 2021-11-01
Version: 2021.11.2
Date: 2021-11-02
Title: Bayesian Time-Stratified Population Analysis
Author: Carl J Schwarz <[email protected]> and
Simon J Bonner <[email protected]>
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# BTSPAS 2011.11.2

* Added trunc.logitP to truncate logit(P) when plotting to avoid problems with extreme cases (issue #30)
* Editorial changes

# BTSPAS 2011.11.1

* Fixed importing sd() from stat package that conflicts with revised sd() in actuar package
Expand Down
2 changes: 1 addition & 1 deletion R/RunTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
RunTime <- function(time, U, prob=seq(0,1,.1)) {
timing <- c(min(time):(1+max(time)))
q.U <- plyr::adply(U, 1, function(U.sample, timing){
quant <- stats::quantile(actuar::grouped.data(Group=timing, Frequency=U.sample), prob=prob)
quant <- stats::quantile(actuar::grouped.data(Group=timing, Frequency=U.sample), prob=prob, na.rm=TRUE)
quant
}, timing=timing, .id=NULL)
q.U
Expand Down
17 changes: 14 additions & 3 deletions R/TimeStratPetersenDiagErrorWHChinook2_fit.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 2021-10-23 CJS Added trunc.logitP to fix plotting problems with extreme logitP
# 2020-12-15 CJS Removed all references to sampfrac in the code
# 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP
# 2018-12-19 CJS deprecation of sampling fraction
Expand Down Expand Up @@ -37,12 +38,13 @@ TimeStratPetersenDiagErrorWHChinook2_fit<-
run.prob=seq(0,1,.1), # what percentiles of run timing are wanted
debug=FALSE, debug2=FALSE,
InitialSeed=ceiling(stats::runif(1,min=0,1000000)),
save.output.to.files=TRUE) {
save.output.to.files=TRUE,
trunc.logitP=15) {
# Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error,
# covariates for the the capture probabilities, and separating the YoY and Age1 wild vs hatchery fish
# The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release
#
version <- '2021-11-01'
version <- '2021-11-02'
options(width=200)

# Input parameters are
Expand Down Expand Up @@ -630,6 +632,14 @@ if (debug)
get.est("H.1" ,plot.df, hatch.after.YoY, results),
get.est("W.YoY",plot.df, hatch.after.YoY, results),
get.est("W.1" ,plot.df, hatch.after.YoY, results))

# add limits to the plot to avoid non-monotone secondary axis problems with extreme values
plot.data$logUguess <- pmax(-10 , pmin(20, plot.data$logUguess))
plot.data$logU <- pmax(-10 , pmin(20, plot.data$logU ))
plot.data$logUlcl <- pmax(-10 , pmin(20, plot.data$logUlcl ))
plot.data$logUucl <- pmax(-10 , pmin(20, plot.data$logUucl ))
plot.data$spline<- pmax(-10 , pmin(20, plot.data$spline))

fit.plot <- ggplot(data=plot.data, aes_(x=~time, color=~group))+
ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals")+
geom_point(aes_(y=~logUguess), shape=16, position=position_dodge(width=.2))+ # guesses for population
Expand Down Expand Up @@ -658,7 +668,8 @@ results$plots$fit.plot <- fit.plot

# plot logit P vs time
logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2,
u2=u2.A.YoY+u2.N.YoY+u2.A.1+u2.N.1, logitP.cov=new.logitP.cov, results=results)
u2=u2.A.YoY+u2.N.YoY+u2.A.1+u2.N.1, logitP.cov=new.logitP.cov, results=results,
trunc.logitP=trunc.logitP)
if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in")
results$plots$logitP.plot <- logitP.plot

Expand Down
78 changes: 39 additions & 39 deletions R/TimeStratPetersenDiagErrorWHChinook_fit.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 2021-10-23 CJS Added trunc.logitP parameter to avoid plotting problems
# 2020-12-15 CJS Removed all uses of sampfrac in the code
# 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP
# 2018-12-19 CSJ deprecated use of sampling fraction
Expand Down Expand Up @@ -84,33 +85,15 @@
#' These are set to NA prior to the fit.
#' @template logitP.cov
#' @template mcmc-parms
#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the
#' prior for the variance of the random noise for the smoothing spline.
#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the
#' prior for the variance of the random noise for the smoothing spline.
#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for
#' the prior for the variance of noise around the spline.
#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for
#' the prior for the variance of noise around the spline.
#' @param prior.beta.logitP.mean Mean of the prior normal distribution for
#' logit(catchability) across strata
#' @param prior.beta.logitP.sd SD of the prior normal distribution for
#' logit(catchability) across strata
#' @param tauP.alpha One of the parameters for the prior for the variance in
#' logit(catchability) among strata
#' @param tauP.beta One of the parameters for the prior for the variance in
#' logit(catchability) among strata
#' @param run.prob Numeric vector indicating percentiles of run timing should
#' be computed.
#' @param debug Logical flag indicating if a debugging run should be made. In
#' the debugging run, the number of samples in the posterior is reduced
#' considerably for a quick turn around.
#' @param debug2 Logical flag indicated if additional debugging information is
#' produced. Normally the functions will halt at \code{browser()} calls to
#' allow the user to peek into the internal variables. Not useful except to
#' package developers.
#' @template tauU.alpha.beta
#' @template taueU.alpha.beta
#' @template prior.beta.logitP.mean.sd
#' @template tauP.alpha.beta
#' @template run.prob
#' @template debug
#' @template InitialSeed
#' @template save.output.to.files
#' @template trunc.logitP

#' @return An MCMC object with samples from the posterior distribution. A
#' series of graphs and text file are also created in the working directory.
Expand Down Expand Up @@ -138,12 +121,13 @@ TimeStratPetersenDiagErrorWHChinook_fit<-
run.prob=seq(0,1,.1), # what percentiles of run timing are wanted
debug=FALSE, debug2=FALSE,
InitialSeed=ceiling(stats::runif(1,min=0, max=1000000)),
save.output.to.files=TRUE) {
save.output.to.files=TRUE,
trunc.logitP=15) {
# Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error,
# covariates for the the capture probabilities, and separating the wild vs hatchery fish
# The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release
#
version <- '2021-11-01'
version <- '2021-11-02'
options(width=200)

# Input parameters are
Expand Down Expand Up @@ -666,18 +650,18 @@ if (debug2) {
results.row.names <- rownames(results$summary)
etaU.W.row.index <- grep("etaU.W", results.row.names)
etaU.W <- results$summary[etaU.W.row.index,]
plot.df$logU.W =etaU.W[,"mean"]
plot.df$lcl.W =etaU.W[,"2.5%"]
plot.df$ucl.W =etaU.W[,"97.5%"]
plot.df$logU.W = etaU.W[,"mean"]
plot.df$logUlcl.W = etaU.W[,"2.5%"]
plot.df$logUucl.W = etaU.W[,"97.5%"]

etaU.H.row.index <- grep("etaU.H", results.row.names)
etaU.H <- results$summary[etaU.H.row.index,]
plot.df$logU.H =etaU.H[,"mean"]
plot.df$lcl.H =etaU.H[,"2.5%"]
plot.df$ucl.H =etaU.H[,"97.5%"]
plot.df$logU.H = etaU.H[,"mean"]
plot.df$logUlcl.H = etaU.H[,"2.5%"]
plot.df$logUucl.H = etaU.H[,"97.5%"]
plot.df$logU.H [1:(hatch.after - min(time)+1)] <- NA # no hatchery fish until release at hatch.after
plot.df$lcl.H [1:(hatch.after - min(time)+1)] <- NA
plot.df$ucl.H [1:(hatch.after - min(time)+1)] <- NA
plot.df$logUlcl.H [1:(hatch.after - min(time)+1)] <- NA
plot.df$logUucl.H [1:(hatch.after - min(time)+1)] <- NA

# extract the spline values for W (wild) and H (hatchery) fish
logUne.W.row.index <- grep("logUne.W", results.row.names)
Expand All @@ -686,18 +670,32 @@ if (debug2) {
plot.df$spline.H <- results$summary[logUne.H.row.index,"mean"]
plot.df$spline.H [1:(hatch.after - min(time)+1)] <- NA # no hatchery fish until release at hatch.after

# add limits to the plot to avoid non-monotone secondary axis problems with extreme values
plot.df$logUguess.W <- pmax(-10 , pmin(20, plot.df$logUguess.W))
plot.df$logUguess.H <- pmax(-10 , pmin(20, plot.df$logUguess.H))
plot.df$logU.W <- pmax(-10 , pmin(20, plot.df$logU.W ))
plot.df$logU.H <- pmax(-10 , pmin(20, plot.df$logU.H ))
plot.df$logUlcl.W <- pmax(-10 , pmin(20, plot.df$logUlcl.W ))
plot.df$logUlcl.H <- pmax(-10 , pmin(20, plot.df$logUlcl.H ))
plot.df$logUucl.W <- pmax(-10 , pmin(20, plot.df$logUucl.W ))
plot.df$logUucl.H <- pmax(-10 , pmin(20, plot.df$logUucl.H ))
plot.df$spline.W <- pmax(-10 , pmin(20, plot.df$spline.W))
plot.df$spline.H <- pmax(-10 , pmin(20, plot.df$spline.H))


fit.plot <- ggplot(data=plot.df, aes_(x=~time))+
ggtitle(title, subtitle="Fitted spline curve to raw U.W[i] U.H[i] with 95% credible intervals")+
geom_point(aes_(y=~logUguess.W), color="red", shape="w")+ # guesses for wild file
geom_point(aes_(y=~logUguess.H), color="green", shape="h")+ # guesses for hatchery fish
xlab("Time Index\nFitted/Smoothed/Raw values plotted for W(black) and H(blue)")+ylab("log(U[i]) + 95% credible interval")+
xlab("Time Index\nFitted/Smoothed/Raw values plotted for W(black) and H(blue)")+
ylab("log(U[i]) + 95% credible interval")+
geom_point(aes_(y=~logU.W), color="black", shape=19)+
geom_line (aes_(y=~logU.W), color="black")+
geom_errorbar(aes_(ymin=~lcl.W, ymax=~ucl.W), width=.1)+
geom_errorbar(aes_(ymin=~logUlcl.W, ymax=~logUucl.W), width=.1)+
geom_line(aes_(y=~spline.W),linetype="dashed") +
geom_point(aes_(y=~logU.H), color="blue", shape=19)+
geom_line (aes_(y=~logU.H), color="blue")+
geom_errorbar(aes_(ymin=~lcl.H, ymax=~ucl.H), width=.1, color="blue")+
geom_errorbar(aes_(ymin=~logUlcl.H, ymax=~logUucl.H), width=.1, color="blue")+
geom_line(aes_(y=~spline.H),linetype="dashed",color="blue")+
ylim(c(-2,NA))+
scale_x_continuous(breaks=seq(min(plot.df$time, na.rm=TRUE),max(plot.df$time, na.rm=TRUE),2))+
Expand All @@ -716,7 +714,9 @@ if(save.output.to.files)ggsave(plot=fit.plot, filename=paste(prefix,"-fit.pdf",s
results$plots$fit.plot <- fit.plot

# Plot the logitP over time
logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2.A+new.u2.N, logitP.cov=new.logitP.cov, results=results)
logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2, u2=new.u2.A+new.u2.N,
logitP.cov=new.logitP.cov, results=results,
trunc.logitP=trunc.logitP)
if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in", dpi=300)
results$plots$logitP.plot <- logitP.plot

Expand Down
53 changes: 23 additions & 30 deletions R/TimeStratPetersenDiagErrorWHSteel_fit.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# 2021-10-24 CJS Added trunc.logitP to fix plotting problems with extreme values of logitP
# 2020-12-15 CJS Remove sampfrac from code
# 2020-11-07 CJS Allowed user to specify prior for beta coefficient for logitP
# 2018-12-19 CJS deprecated use of sampling fraction
Expand Down Expand Up @@ -67,33 +68,15 @@
#' hatchery unmarked (but adipose fin clipped) age 1+ fish should be ignored.
#' @template logitP.cov
#' @template mcmc-parms
#' @param tauU.alpha One of the parameters along with \code{tauU.beta} for the
#' prior for the variance of the random noise for the smoothing spline.
#' @param tauU.beta One of the parameters along with \code{tauU.alpha} for the
#' prior for the variance of the random noise for the smoothing spline.
#' @param taueU.alpha One of the parameters along with \code{taueU.beta} for
#' the prior for the variance of noise around the spline.
#' @param taueU.beta One of the parameters along with \code{taueU.alpha} for
#' the prior for the variance of noise around the spline.
#' @param prior.beta.logitP.mean Mean of the prior normal distribution for
#' logit(catchability) across strata
#' @param prior.beta.logitP.sd SD of the prior normal distribution for
#' logit(catchability) across strata
#' @param tauP.alpha One of the parameters for the prior for the variance in
#' logit(catchability) among strata
#' @param tauP.beta One of the parameters for the prior for the variance in
#' logit(catchability) among strata
#' @param run.prob Numeric vector indicating percentiles of run timing should
#' be computed.
#' @param debug Logical flag indicating if a debugging run should be made. In
#' the debugging run, the number of samples in the posterior is reduced
#' considerably for a quick turn around.
#' @param debug2 Logical flag indicated if additional debugging information is
#' produced. Normally the functions will halt at \code{browser()} calls to
#' allow the user to peek into the internal variables. Not useful except to
#' package developers.
#' @template tauU.alpha.beta
#' @template taueU.alpha.beta
#' @template prior.beta.logitP.mean.sd
#' @template tauP.alpha.beta
#' @template run.prob
#' @template debug
#' @template InitialSeed
#' @template save.output.to.files
#' @template save.output.to.files
#' @template trunc.logitP
#'
#' @return An MCMC object with samples from the posterior distribution. A
#' series of graphs and text file are also created in the working directory.
Expand Down Expand Up @@ -121,13 +104,14 @@ TimeStratPetersenDiagErrorWHSteel_fit <-
run.prob=seq(0,1,.1), # what percentiles of run timing are wanted
debug=FALSE, debug2=FALSE,
InitialSeed=ceiling(stats::runif(1,min=0, 1000000)),
save.output.to.files=TRUE) {
save.output.to.files=TRUE,
trunc.logitP=15) {
# Fit a Time Stratified Petersen model with diagonal entries and with smoothing on U allowing for random error,
# covariates for the the capture probabilities, and separating the wild vs hatchery fish for STEELHEAD releases
# The steelhead are nice because 100% of hatchery fish are adipose fin clipped and no wild fish are adipose fin clipped
# The "diagonal entries" implies that no marked fish are recaptured outside the (time) stratum of release
#
version <- '2021-11-01'
version <- '2021-11-02'
options(width=200)

# Input parameters are
Expand Down Expand Up @@ -223,7 +207,7 @@ if(!all(bad.u2.W.1 %in% time, na.rm=TRUE)){
if(!all(bad.u2.H.1 %in% time, na.rm=TRUE)){
cat("***** ERROR ***** bad.u2.H.1 must be elements of strata identifiers. You entered \n bad.u2.H.1:",
paste(bad.u2.H.1,collapse=","),"\n Strata identifiers are \n time:",
paste(time, ,collapse=","), "\n")
paste(time ,collapse=","), "\n")
return()}
if(!all(hatch.after %in% time, na.rm=TRUE)){
cat("***** ERROR ***** hatch.after must be elements of strata identifiers. You entered \n hatch.after:",
Expand Down Expand Up @@ -621,6 +605,14 @@ if (debug)
plot.data <-rbind( get.est("H.1" ,plot.df, hatch.after, results),
get.est("W.YoY",plot.df, hatch.after, results),
get.est("W.1" ,plot.df, hatch.after, results))

# add limits to the plot to avoid non-monotone secondary axis problems with extreme values
plot.data$logUguess <- pmax(-10 , pmin(20, plot.data$logUguess))
plot.data$logU <- pmax(-10 , pmin(20, plot.data$logU ))
plot.data$logUlcl <- pmax(-10 , pmin(20, plot.data$logUlcl ))
plot.data$logUucl <- pmax(-10 , pmin(20, plot.data$logUucl ))
plot.data$spline <- pmax(-10 , pmin(20, plot.data$spline))

fit.plot <- ggplot(data=plot.data, aes_(x=~time, color=~group))+
ggtitle(title, subtitle="Fitted spline curve with 95% credible intervals")+
geom_point(aes_(y=~logUguess), shape=16, position=position_dodge(width=.2))+ # guesses for population
Expand Down Expand Up @@ -650,7 +642,8 @@ results$plots$fit.plot <- fit.plot

# plot the estimated logits over time
logitP.plot <- plot_logitP(title=title, time=new.time, n1=new.n1, m2=new.m2,
u2=new.u2.W.YoY+new.u2.W.1+new.u2.H.1, logitP.cov=new.logitP.cov, results=results)
u2=new.u2.W.YoY+new.u2.W.1+new.u2.H.1, logitP.cov=new.logitP.cov, results=results,
trunc.logitP=trunc.logitP)
if(save.output.to.files)ggsave(plot=logitP.plot, filename=paste(prefix,"-logitP.pdf",sep=""), height=6, width=10, units="in")
results$plots$logitP.plot <- logitP.plot

Expand Down
Loading

0 comments on commit 83e39b4

Please sign in to comment.