-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClass4_script_2019.R
163 lines (118 loc) · 7.41 KB
/
Class4_script_2019.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Script for Class 4
# PLS 298 F2019
# Andrew Latimer
library(bbmle) # Ben Bolker's library of mle functions
library(emdbook) # contains the myxomitosis data set we will use
library(MASS); library(lattice)
#### PART 1: To work through together as a class with projector ####
# Implementing Bolker's tadpole example of a binomial likelihood
tadpole <- read.table("tadpole.dat")
head(tadpole)
str(tadpole)
# This data set contains results of trials of tadpoles exposed to predators.
# The 2 relevant columns are: Kill = number killed, and Exposed = number in the trial.
# It's always useful to look at the data.
barplot(tadpole$Kill/tadpole$Exposed, ylim=c(0, 1), xlab="Trials", ylab="Proportion killed")
# Define the negative log-likelihood for the binomial distribution.
# As in the reading, the binomial likelihood in R is:
# dbinom(prob=p, k, size=N) Where k is the vector of successes observed,
# N is the number of trials, and p is probability of success, the parameter we're estimating.
binomNLL1 <- function(p, k, N) {
return(-sum(dbinom(prob=p, k, size=N, log=T)))
}
# Q why do you think we want to work on the log scale right away?
# because we know we are interested in the log likelihood, and this is mathematically simpler
# Alternatively, we could calculate the individual likelihood values and multiply, then log-transform the result.
# We can quickly test to make sure this function is working by supplying a few values for p
binomNLL1(0.5, tadpole$Kill, tadpole$Exposed) # this p is obviously an overestimate since max kills is 5
binomNLL1(0.1, tadpole$Kill, tadpole$Exposed) # this might be closer to correct
# Comparing these two results, does the number change in the direction you expect? What does this indicate?
# prob of being killed is closer to 0.5 than to 0.1
# The basic R function for optimizing the parameters of a function is optim()
# We can use it to fit the binomial likelihood to data
opt.out <- optimize(f=binomNLL1, interval=c(0,1), N=tadpole$Exposed, k=tadpole$Kill)
# Compare to what we know is the actual ML estimate:
sum(tadpole$Kill)/sum(tadpole$Exposed) # 0.106
opt.out # 26.847, MLE = 0.106
# We could alternatively use Ben Bolker's user-friendlier ML function mle2()
# For the binomial likelihood, it already knows the likelihood function:
mle.out <- mle2(Kill~dbinom(prob=p, size=10), data=tadpole, start=list(p=0.5))
mle.out # 26.85 NLL, coeff p = 0.106
# The reason to work with log-likelihoods becomes clear if we look at the actual value of the likelihood here:
logLik(mle.out)
exp(-26.85)
# How would we interpret this probability? Does it matter that it's low? Relative to what?
# it's very small.
#### PART 2: Working in groups ####
# Implementing Bolker's myxomitosis example of a gamma likelihood
data(MyxoTiter_sum)
# simplify the data set to just one strain ("grade") of virus
myxdat <- subset(MyxoTiter_sum, grade==1)
# define the Gamma negative log-likelihood
gammaNLL1 <- function(shape, scale) {
return(-sum(dgamma(myxdat$titer, shape=shape, scale=scale, log=T)))
}
# We can look up the gamma distribution and see what its moments are to get starting values
shape.start <- mean(myxdat$titer)^2 / var(myxdat$titer)
scale.start <- var(myxdat$titer) / mean(myxdat$titer)
m1 <- mle2(gammaNLL1, start=list(shape=shape.start, scale=scale.start), trace=T)
# Would the normal distribution be a better fit for these data?
# we could fit the normal and use AIC to compare.
m2 <- mle2(titer~dnorm(mean=mu, sd=sigma), data=myxdat, start=list(mu=10, sigma=1))
# Or it's more straightforward to use this function from the MASS library:
#m2 = fitdistr(myxdat$titer, densfun="normal")
AIC(m1, m2) # Which distribution is better according to this criterion?
# normal is better (by 3 AIC points; usually a diff of 2 is considered enough)
# Does the penalty for number of parameters matter here?
# no, same penalty
# We might be interested in the shape of the log-likelihood around the MLEs.
# For a one-dimensional likelihood that's simple.
# For a 2-dimensional likelihood we need to do a conditional slice through the bivariate surface.
# Ben Bolker wrote some functions that make this easier if we have used his mle2() function to fit the likelihood.
s1 <- slice(m1,verbose=FALSE) # this takes a "slice" through the negative LL surface, setting the other parameters to their MLE values.
plot(s1)
# We might also want to see a more general picture of the likelihood "landscape". To get that, we can evaluate the likelihood function across a grid of points, then use a 3d plotting function to display it.
# Set up the values for the x and y axes
x.shape <- seq(20, 80, by=0.5)
x.scale <- seq(0.1, 0.15, by=0.0025)
x.grid <- expand.grid(x.shape, x.scale)
names(x.grid) <- c("shape","scale")
head(x.grid) # check what this data fram looks like
z <- rep(0, dim(x.grid)[1])
for (i in 1:dim(x.grid)[1]) z[i] <- gammaNLL1(x.grid[i,1], x.grid[i,2])
z.mat <- matrix(z, nrow=length(x.shape),byrow=FALSE)
persp(x.shape, x.scale, z.mat, theta=10, phi=10, col="light gray")
#### PART 3: Optional exercise to work through on your own ####
# Fitting a more complex likelihood
data(ReedfrogFuncresp)
# We can go back to the question of predation on tadpoles, but this time, instead of just fitting a distribution, we can ask whether there is a dependence of predation risk on the density of tadpoles.
# Bolker demonstrates one way to do this: assume that predators respond to density initially by killing more tadpoles, but that they have a maximum predation rate per unit time, so that the per capita risk to tadpoles declines with increasing density.
# In this model, there is a "handling time" (parameter h) that each predator needs to process each prey item, so the predators have a maximum predation rate regardless of search time. You could choose different predator response functions but let's follow Bolker and use the Holling Type II (i.e. hyperbolically decreasing risk).
# We're still using the binomial probability to model the data, but now Pr(Kill) is a function of prey density.
binomNLL2 <- function(params, p, N, k) {
a <- params[1]
h <- params[2]
predprob <- a/ (1 + a * h * N) # fill in the functional response here (answer: see Bolker p.183)
return(-sum(dbinom(prob=predprob, size=N, k, log = T))) # fill in the likelihood function here (ditto)
}
# Then fit the function
# Because passing the parameters is more complicated in this example, I'll give an example of how you could do it.
Initial <- as.vector(ReedfrogFuncresp$Initial)
Killed <- as.vector(ReedfrogFuncresp$Killed)
opt2 <- optim(fn=binomNLL2, par= c(a=0.5, h=0.02), N=Initial, k=Killed, control=list(trace=6))
# To look at the model fit we can compare to the data:
a.hat <- opt2$par[1] # MLE for a
h.hat <- opt2$par[2] # MLE for h
x.pred <- seq(1, 101,by=2)
predprob <- a.hat/(1+a.hat*h.hat*x.pred)
predprob.med <- qbinom(0.5, size=x.pred, prob=predprob)
predprob.lcl <- qbinom(0.025, size=x.pred, prob=predprob)
predprob.ucl <- qbinom(0.975, size=x.pred, prob=predprob)
plot(Killed~Initial, ReedfrogFuncresp, pch=16, col="green")
lines(x.pred, predprob.med)
lines(x.pred, predprob.lcl, lty=2)
lines(x.pred, predprob.ucl, lty=2)
# To see how good this model is, what could we compare it to?
# What are a couple ways you could compare it?
# for some discussion of the Holling type II function and alternatives, here's one discussion:
# http://www.math.unl.edu/~bdeng1/Teaching/math943/Topics/Stability%20of%20Ecosystems/Skal01.pdf