-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.Rmd
226 lines (207 loc) · 7.87 KB
/
setup.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
---
title: "Generic set-up for all Rmds"
author: "EE Holmes"
output:
pdf_document: default
html_document: default
---
```{r echo=FALSE}
# This file is called by all Rmds that make tables and figures
# The data frames spawners0 (Jul-Sep catch), nonspawners0 (Oct-Mar), and dat.chl (chl analyses)
# are created here and used throughout the paper
```
```{r message=FALSE, warning=FALSE, echo=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(SardineForecast)
library(stringr)
library(mgcv)
library(knitr)
# If this is being called from another Rmd (as a child), this counter will exist
if (!exists(".rmdenvir")) .rmdenvir <- environment()
# detect if making latex
knitOut <- knitr::opts_knit$get("rmarkdown.pandoc.to")
if (length(knitOut) == 0) isLatex <- FALSE else isLatex <- (knitOut == "latex")
```
```{r message=FALSE, warning=FALSE, echo=FALSE}
Qtr <- 3 # start of season
reg <- "Kerala"
# The Qtr 3 catch is called spawners0
respdat <- data.frame(
spawners0 = oilsardine_qtr[[reg]][oilsardine_qtr$Qtr == 3]
)
# This is a completely obtuse way to add the Qtr 4 t and Qtr 1 t+1 time-series!!
# The [seq(6,dim(oilsardine_qtr)[1],4)] bit at the end is what selects the right values
CatchWin <- stats::filter(oilsardine_qtr[[reg]], c(0, 1, 1, 0), sides = 1)[seq(6, dim(oilsardine_qtr)[1], 4)]
respdat$CatchWin0 <- c(CatchWin, NA)
# add Oct-Mar Catch = qtr 4 t plus qtr 1 t+1; Call this nspawners0
respdat$nspawners0 <- respdat$CatchWin0
respdat <- log(respdat)
respdat$Year <- oilsardine_qtr$Year[oilsardine_qtr$Qtr == Qtr]
# make sure the lengths match
respdat <- respdat[respdat$Year <= max(seio_covariates_mon$Year), ]
```
```{r message=FALSE, warning=FALSE, echo=FALSE}
# This part selects the covariates to use
# There are many more. See colnames(seio_covariates_mon). See the SardineForecast-1.11-paper.tar.gz file
# in the data folder for the code that prepared all the variables.
covnames <- c(
paste0("log.CHL.", 1:13),
paste0("SST.UPW.", 1:5),
paste0("SST.", 1:13),
paste0("SSTICOAD.", 1:13),
"precip.gpcp.kerala", "Precip.Kerala",
"ONI", "DMI", "PDO", "AMO", "EMTperp.UPW", "Wetip.UPW", "Weperp.UPW",
"Bakun.UPW"
)
# This part is specifying the months to average over
covmon <- list(
7:9, 1:3, 10:12, 9:10, 6:9, 1:6, 7:12,
1:12, 3:5, 4:6, 1:5, 4:5, 6:7,
9:11, 8, 9, 7:8, 1:2
)
# Make a copy so I don't trash seio_covariates_mon by accident
seio_covariates_mon2 <- seio_covariates_mon
# This part is creating lagged covariates
n <- 2 # max lag in cov to use
for (i in 1:length(covnames)) {
covname <- covnames[i]
for (j in 1:length(covmon)) {
mon <- covmon[[j]]
# this part gives the variable a useful name with the months averaged info
varname <- paste(covname, ".mon", mon[1], "to", mon[length(mon)], ".0", sep = "")
# this line does the monthly averaging
covdat <- tapply(seio_covariates_mon2[[covname]], seio_covariates_mon2$Year, function(x) {
mean(x[mon], na.rm = TRUE)
})
covdat[is.infinite(covdat)] <- NA
respdat[[varname]] <- covdat
}
}
# This part computes the mean across boxes. 2:5 are the coastal boxes and 2:5 plus 7:10 is the regional
for (bx in list(c(2:5), c(2:5, 7:10))) {
for (j in c("mon1to3.0", "mon9to10.0", "mon6to9.0", "mon1to12.0", "mon3to5.0", "mon10to12.0", "mon7to9.0")) {
for (jj in c("SST.", "log.CHL.", "SSTICOAD.")) {
varname <- c(paste0(jj, bx, ".", j))
tmp <- apply(respdat[, varname], 1, mean, na.rm = TRUE)
varname <- paste0(jj, bx[1], ".", bx[NROW(bx)], ".", j)
respdat[[varname]] <- tmp
}
}
}
# add lags
for (i in colnames(respdat)) {
name <- paste(str_sub(i, 1, str_length(i) - 1), "1", sep = "")
respdat[[name]] <- c(NA, respdat[[i]][1:(dim(respdat)[1] - 1)])
name <- paste(str_sub(i, 1, str_length(i) - 1), "2", sep = "")
respdat[[name]] <- c(NA, NA, respdat[[i]][1:(dim(respdat)[1] - 2)])
}
# make 2.5 year runsum of regional SST
for (varn in c("SST.", "DMI", "Precip.Kerala", "SSTICOAD.")) {
# 2.5 is nearshore; 2.10 is regional
if (varn == "SST." | varn == "SSTICOAD.") ivals <- c("2.5", "2.10")
if (varn == "DMI" | varn == "Precip.Kerala") ivals <- c("")
for (i in ivals) {
tmp <- (respdat[, paste0(varn, i, ".mon1to12.0")] * 12 -
respdat[, paste0(varn, i, ".mon10to12.0")] * 3 -
respdat[, paste0(varn, i, ".mon7to9.0")] * 3) +
respdat[, paste0(varn, i, ".mon1to12.1")] * 12 +
respdat[, paste0(varn, i, ".mon1to12.2")] * 12
varname <- paste0(varn, i, ".3.yr.runsum.0")
respdat[[varname]] <- tmp / (12 + 12 + 6)
# create the lag 1
for (ii in paste0(varn, i, ".3.yr.runsum.0")) {
name <- paste(str_sub(ii, 1, str_length(ii) - 1), "1", sep = "")
respdat[[name]] <- c(NA, respdat[[ii]][1:(dim(respdat)[1] - 1)])
name <- paste(str_sub(ii, 1, str_length(ii) - 1), "2", sep = "")
respdat[[name]] <- c(NA, NA, respdat[[ii]][1:(dim(respdat)[1] - 2)])
}
}
}
# make PDO, AMO, ONI and DMI indices
for (varn in c("AMO", "PDO", "ONI", "DMI")) {
tmp <- (respdat[, paste0(varn, ".mon1to6.0")] * 6 +
respdat[, paste0(varn, ".mon7to12.1")] * 6)
varname <- paste0(varn, ".mon7to6.0")
respdat[[varname]] <- tmp / (12)
# create the lag 1 and 2
for (ii in paste0(varn, ".mon7to6.0")) {
name <- paste(str_sub(ii, 1, str_length(ii) - 1), "1", sep = "")
respdat[[name]] <- c(NA, respdat[[ii]][1:(dim(respdat)[1] - 1)])
name <- paste(str_sub(ii, 1, str_length(ii) - 1), "2", sep = "")
respdat[[name]] <- c(NA, NA, respdat[[ii]][1:(dim(respdat)[1] - 2)])
}
}
fullrespdat <- respdat
```
```{r message=FALSE,warning=FALSE, echo=FALSE}
# The code above makes all the variables but also makes many more than needed
# Prune down to the needed ones
runsumnames <- colnames(fullrespdat)[stringr::str_detect(colnames(fullrespdat), "runsum")]
runsumnames <- unique(sapply(runsumnames, function(x) {
str_split(x, "runsum")[[1]][1]
}))
runsumnames <- paste0(runsumnames, "runsum.")
covnames <- c(
"precip.gpcp.kerala.mon6to7.",
"precip.gpcp.kerala.mon4to5.",
"Precip.Kerala.mon6to7.",
"Precip.Kerala.mon4to5.",
"SST.UPW.4.mon6to9.",
"SST.UPW.4.mon10to12.",
"EMTperp.UPW.mon6to9.",
"EMTperp.UPW.mon4to5.",
"Wetip.UPW.mon1to2.",
"Wetip.UPW.mon6to9.",
"Weperp.UPW.mon6to9.",
"Bakun.UPW.mon6to9.",
"SST.2.5.mon6to9.",
"SST.2.10.mon3to5.",
"SST.2.5.mon10to12.",
"ONI.mon7to6.",
"PDO.mon7to6.",
"AMO.mon7to6.",
"AMO.mon1to12.",
"DMI.mon9to11.",
"DMI.mon6to9.", runsumnames
)
```
```{r message=FALSE,warning=FALSE, echo=FALSE}
# set up to use consistent data
# I set up dat.spawners sep since that gives me 2015 for spawners0; that is NA for nspawners0
dat <- fullrespdat[, c(
"Year", "nspawners1", "nspawners2", "spawners0", "spawners1", "spawners2",
paste(covnames, "0", sep = ""), paste(covnames, "1", sep = "")
)]
dat.spawners <- dat
if (any(diff(dat.spawners$Year) != 1)) stop("Problem in setup.Rmd. The covariate data.frame cannot have any missing years.")
```
```{r message=FALSE,warning=FALSE, echo=FALSE}
# set up to use consistent data
dat <- fullrespdat[, c(
"Year", "nspawners0", "nspawners1", "nspawners2",
"spawners0", "spawners1", "spawners2",
paste(covnames, "0", sep = ""),
paste(covnames, "1", sep = "")
)]
dat.nonspawners <- dat
if (any(diff(dat.nonspawners$Year) != 1)) {
stop("Problem in setup.Rmd. The covariate data.frame cannot have any missing years.")
}
rm(dat) # make sure I don't accidentally forget to define dat
```
```{r message=FALSE,warning=FALSE, echo=FALSE}
covnames.chl <- c(
"log.CHL.2.5.mon1to3.", "log.CHL.2.5.mon7to9.", "log.CHL.2.5.mon10to12.",
covnames
)
# set up to use consistent data
dat <- fullrespdat[, c(
"Year", "nspawners0", "nspawners1", "nspawners2", "spawners0", "spawners1", "spawners2",
paste(covnames.chl, "0", sep = ""), paste(covnames.chl, "1", sep = "")
)]
dat.chl <- dat
if (any(diff(dat.chl$Year) != 1)) {
stop("Problem in setup.Rmd. The covariate data.frame cannot have any missing years.")
}
rm(dat) # make sure I don't accidentally forget to define dat
```