-
Notifications
You must be signed in to change notification settings - Fork 0
/
ss3sim_workshop.R
326 lines (295 loc) · 14.1 KB
/
ss3sim_workshop.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
### This file demonstrates the capabilities and workflow for the ss3sim
### stock assessment simulation framework.
### Contact the authors for any questions or
### issues. For more on the package see:
###
### Anderson, S.C., C.C. Monnahan, K.F. Johnson, K. Ono, and
### J.L. Valero. 2014. ss3sim: An R Package for Fisheries Stock Assessment
### Simulation with Stock Synthesis. Plos One 9:e92725.
### https://doi.org/10.1371/journal.pone.0092725
### ------------------------------------------------------------
### ------------------------------------------------------------
### [Step 0]
### Startup the working environment
## Set your working directory to be in the same folder as this script
getwd()
setwd(dir= )
## Load the necessary libraries
library(r4ss)
library(ss3sim)
library(foreach)
library(doParallel, quietly = TRUE, verbose = FALSE)
library(ggplot2)
## Setup parallel cores and iterations to run
Sys.getenv("NUMBER_OF_PROCESSORS")
cores <- 2 # cores for parallel
registerDoParallel(cores)
## Some checks before proceeding
packageVersion("ss3sim") # should be 0.9.5.9000 (GitHub) or 0.9.6 (CRAN)
if(as.numeric(substr(packageVersion("r4ss"), 1, 4)) < 1.27) stop("r4ss should",
" be version 1.27.0 or greater")
Sys.info()[5] # machine needs to be 64bit
## Check that a new enough R version is being used. R>= 3.3
if(version$major != "3" | as.numeric(version$minor) < 3)
stop('R version needs to be > 3.3')
### End of [Step 0]
### ------------------------------------------------------------
### ------------------------------------------------------------
### [Step 1]
### Run a simple example of a scenario to demonstrate input syntax,
### output structure, and results.
### This is our 'cod' model with data-rich sampling history and
### a two-way trip fishing history.
### A full simulation for research purposes would have multiple scenarios.
## Tell ss3sim the specifications for the
## * operating model (OM),
## * sampling,
## * estimation model (EM).
## Case files (.txt files stored in a directory, here labeled 'cases')
## tell ss3sim what to do.
## Case files are linked with internal ss3sim functions.
## F == effort trajectory
## D == sampling protocol of index of abundance (index),
## length-composition data (lcomp), and age-composition data (agecomp).
## <Look at input folder structure.>
run_ss3sim(
iterations=1, # vector of iterations to run
scenarios='D0-F1-cod', # vector of scenarios
case_folder='cases', # folder containing case files
case_files=list( # linking case files to cases
F='F',
D=c('index','lcomp','agecomp')),
om_dir='model/om/', # location of the OM ss3 files
em_dir='model/em/') # location of the EM ss3 files
## Should see "Completed iterations: 1 for scenarios: D0-F1-cod" on console.
## <Look at the folder structure that was created.>
## We can also use r4ss to ## make standard plots.
## Plot the OM
r4ss::SS_plots(
replist=r4ss::SS_output(
'D0-F1-cod/1/om', # Specify the OM folder
forecast=FALSE, covar=FALSE,
NoCompOK=TRUE, ncols=250, printstats=FALSE, verbose=FALSE),
png=TRUE, uncertainty=FALSE, html=FALSE, verbose=FALSE)
## Plot the EM
r4ss::SS_plots(
replist=r4ss::SS_output(
'D0-F1-cod/1/em', # Specify the EM folder
forecast=FALSE, covar=FALSE,
NoCompOK=TRUE, ncols=250, printstats=FALSE, verbose=FALSE),
png=TRUE, uncertainty=FALSE, html=FALSE, verbose=FALSE)
dev.off()
## <Navigate to "D0-F1-cod/1/om/plots/catch9 harvest rate">
## <Navigate to "D0-F1-cod/1/em/plots/catch9 harvest rate">
## Now rerun with more replicates, in parallel (within scenario) this
## time. We recommend using serial execution during development since error
## messages in parallel can be more difficult to interpret.
## Specify arguments that will be recycled.
case_files <- list(F='F', D=c('index','lcomp','agecomp'))
om <- 'model/om/'; em <- 'model/em/'
run_ss3sim(
iterations=2:4, # Run iteration 2, 3, and 4 in parallel
scenarios='D0-F1-cod', # Same scenario as last time
case_folder='cases',
case_files=case_files,
om_dir=om,
em_dir=em,
parallel=TRUE, # Tells ss3sim to run things in parallel
parallel_iterations=TRUE) # Specifies iterations or scenarios
## Should see "Running iterations in parallel ..."
## Read in results of the runs.
## Use 'get_results_all', which uses r4ss to read in Report.sso files and
## write two .csv files for each scenario (stored in the scenario folders)
## and two .csv files combining the results from each scenario.
## Here, the files will be identical because we only have one scenario.
get_results_all(user='D0-F1-cod')
## <look at csv files>
results_sc_1 <- read.csv('ss3sim_scalar.csv') # Scalar quantities, e.g., M
results_ts_1 <- read.csv('ss3sim_ts.csv') # Time-series quantities, e.g., F
## If ss3sim is not working on your computer or you want to look at the
## results without running the models, open the stored csv files in
## the 'results' folder.
## results_sc_1 <- read.csv(file.path('results', 'results_sc_1.csv'))
## results_ts_1 <- read.csv(file.path('results', 'results_ts_1.csv'))
## Add columns of relative error
results_sc_1 <- calculate_re(results_sc_1, add=TRUE)
results_ts_1 <- calculate_re(results_ts_1, add=TRUE)
## <see the additional columns>
head(results_ts_1)
## ss3sim has a set of functions for plotting scalar and time-series output:
## e.g., "plot_ts_lines", which will plot lines for each iteration of any
## column you wish in "results_ts_1" over time.
args(plot_ts_lines)
?plot_ts_lines
plot_ts_lines(results_ts_1, y='SpawnBio_om', vert='D')
plot_ts_lines(results_ts_1, y='SpawnBio_re', vert='D', re=TRUE)
## We can also look at scalar values, which will plot points for each iteration
plot_scalar_points(results_sc_1, x='D', y='depletion_re', re=TRUE)
plot_scalar_boxplot(results_sc_1, x='D', y='SSB_MSY_re', re=TRUE)
## If you had multiple scenarios, you could look at the results by scenario
plot_scalar_boxplot(results_sc_1, x='D', y='SSB_MSY_re', re=TRUE,
horiz = "scenario") # Boxplot for each scenario stacked vertically
plot_scalar_boxplot(results_sc_1, x='D', y='SSB_MSY_re', re=TRUE,
horiz = "species") # life histories stacked vertically
## Print a list of scalar values, for which relative error was calculated.
names(results_sc_1)[grep('_re', x=names(results_sc_1))]
### End of [Step 1]
### ------------------------------------------------------------
### ------------------------------------------------------------
### [Step 2].
## Create a new data case.
## Create three new text files manually,
## run the new scenario, and
## compare performance with the previous scenario from Step 1, "D0-F1-cod".
##
## The data case is referenced with letter D. Associated with this case are
## three files:
case_files$D
## Each case file contains arguments that are passed to sampling
## functions within ss3sim:
## an index of abundance (or CPUE),
## length compositions (lcomp), and
## age compositions (agecomp).
## ss3sim reads these text files in and parses the text to R.
## All of the arguments supplied in the text files are stored in a list
## within R and ss3sim passes the arguments to functions during the simulation
## at the appropriate time (e.g., F arguments before the OM runs).
## Internally, the parsing takes place with functions like the following:
caseargs <- get_caseargs(folder='cases', scenario='D0-F1-cod', case_files=case_files)
caseargs$F
## Plot the fishing trajectory passed to the OM.
plot(fvals~years, data=caseargs$F,
type='l', ylab=expression(italic('F')), las = 1)
## Here are the agecomp case arguments:
(ageargs <- caseargs$agecomp)
## The arguments supplied within the text files specific to the D case
## lead to sampling of data generated by the OM.
## ss3sim passes these arguments after the OM is completed, but before the
## EM is ran. There are three case files associated with the arguments
## specific to sampling (agecomp0-cod.txt, lcomp0-cod.txt, and index0-cod.txt).
## For a given scenario, all D-case text files need to have the same number,
## whereas other cases contain a single text file.
##
## For example,
## (1) ss3sim extracts the expected data from the OM
## (2) ss3sim uses r4ss to read this data (e.g., 'data.ss_new') into memory
## (3) ss3sim samples from the data leading to data with observation error
## and writes it to the disk
ss3sim:::extract_expected_data('om_data.ss_new', data_out='expected.dat')
dat <- r4ss::SS_readdat(file='expected.dat', verbose=FALSE)
sample_agecomp(dat_list=dat, outfile='sampled.dat',
fleets=ageargs$fleets,
Nsamp=ageargs$Nsamp,
years=ageargs$years,
cpar=ageargs$cpar)
## <Look at differences between in text editor>.
r4ss::SS_writedat(dat, outfile = "expected_formatted.dat", verbose = FALSE)
## ===== EXERCISE: CREATE A NEW CASE 'D1' THAT HAS LESS DATA =====.
## [1] save a copy of agecomp0-cod.txt as agecomp1-cod.txt. Then reduce
## sample sizes from 125 for fleet 1 (the fishery) to 25 and from 500 to
## 100 for fleet 2 (the survey).
## [2] Repeat with the length comps.
## [3] Lastly, we need index1-cod too. Create this and and double the value
## for sds_obs to 0.4 (sd in log space of the index).
## Now make sure these can be parsed correctly:
caseargs <- get_caseargs(folder='cases', scenario='D1-F1-cod', case_files=case_files)
caseargs$agecomp
caseargs$lcomp
caseargs$index
## Now run the new scenario, "D1-F1-cod", where
## D0 was replaced with D1, so ss3sim uses the files you created.
run_ss3sim(
iterations=1:cores, # run iterations in parallel
scenarios='D1-F1-cod', # run the new scenario
case_folder='cases', case_files=case_files, om_dir=om, em_dir=em,
parallel=TRUE, parallel_iterations=TRUE)
## Compare D0 and D1 by reading in the ss3 files using
## ss3sim::get_results_all, which will create additional csv files for
## the new scenario and combine them with the old csv file for the original
## scenario.
get_results_all(user=c('D0-F1-cod', 'D1-F1-cod'))
results_sc_2 <- calculate_re(read.csv('ss3sim_scalar.csv'), add=TRUE)
results_ts_2 <- calculate_re(read.csv('ss3sim_ts.csv'), add=TRUE)
## In case the simulation did not work for you uncomment the following
## two lines to read in the stored result files.
## results_sc_2 <- read.csv('results/results_sc_2.csv')
## results_ts_2 <- read.csv('results/results_ts_2.csv')
plot_scalar_boxplot(results_sc_2, x='D', y='depletion_re', re=TRUE)
plot_scalar_boxplot(results_sc_2, x='D', y='SSB_MSY_re', re=TRUE)
plot_scalar_boxplot(results_sc_2, x='D', y='SizeSel_1P_1_Fishery_re', re=TRUE)
plot_ts_lines(results_ts_2, y='SpawnBio_re', vert='D', re=TRUE)
## Note: Each replicate number always has the same recdevs so the scenarios
## are more comparable:
plot_ts_lines(subset(results_ts_2, replicate==1), y='SpawnBio_om', vert='D')
### End of [Step 2]
### ------------------------------------------------------------
### ------------------------------------------------------------
### [Step 3].
### Explore the interaction of process error and data weighting.
## In addition to the sampling functions, there are a variety of other
## functions that manipulate the models during runtime. For instance
## 'change_e' is used to change starting parameters and phases for
## parameters in the EM.
args(change_e)
?change_e
## We will use it to turn on and off estimation of M and steepness. Growth
## parameters will also be turned off (and fixed at their true values).
E.cases <- c(0,1,2)
E.df <- data.frame(
E=paste0('E', E.cases),
estimated=factor(c("M & h fixed", "h estimated", "M estimated"),
levels=c("M & h fixed", "h estimated", "M estimated")))
E.df
## <look at E0, E1 and E2 in extra_cases>
## 'change_tv' adds time-varying (tv) changes to OM parameters using
## environmental deviations. This function is very useful for adding
## process error in the OM.
args(change_tv)
?change_tv
## We will use it to add a trend to a selex parameter
selex.scalar.vec <- c(0,20)
S.cases <- seq_along(selex.scalar.vec)
S.df <- data.frame(selex=factor(c("No Process Error", "Process Error")),
S=paste0('S', S.cases))
S.df
## <look at S0 and S1>
## The sampling functions have an optional argument for the effective
## sample size (ESS). Here we specify it incorrectly to explore different
## data weightings on the age and length compositions.
ESS.scalar.vec <- sort(unique(c(1,exp(seq(log(.1),log(10), len=10)))))
D.cases <- seq_along(ESS.scalar.vec)
D.df <- data.frame(ess.ratio=ESS.scalar.vec, D=paste0('D', D.cases))
D.df
## <look at lcomp11-cod.txt>
## The case files are in the 'extra_cases' folder and ready to go. Notice
## that we've added new letters 'S' and 'E' to our case files. The package will now
## look for files named "S" to match up to our S cases (S0 and S1).
case_folder <- file.path('cases', 'extra_cases')
case_files <- list(F="F", D=c("index","lcomp","agecomp"), S='S', E='E')
scenarios <- expand_scenarios(cases=list(D=D.cases, F=1, S=S.cases, E=E.cases),
species='cod')
scenarios
## run_ss3sim(iterations=1:50, scenarios=scenarios, parallel=TRUE,
## parallel_iterations=TRUE, case_folder=case_folder, om_dir=om,
## em_dir=em, case_files=case_files)
## This simulation takes too long to run ,but here are the results,
## processed a bit
results_sc_3 <- readRDS(file.path('results', 'results_sc_3.Rdata'))
results_ts_3 <- readRDS(file.path('results', 'results_ts_3.Rdata'))
## Look at some performance measures
myylim <- ylim(-1,1)
g <- ggplot(results_sc_3, aes(ess.ratio, SSB_MSY_re, group=replicate))+
geom_line(alpha=.5) + facet_grid(estimated~selex.scalar) +
geom_vline(xintercept=1, col='blue') +
geom_hline(yintercept=0, col='red') + myylim+ theme_bw()
g
ggsave(file.path('plots', 'SSB_MSY_re.png'), g, width=9, height=6)
g <- ggplot(results_sc_3, aes(ess.ratio, depletion_re, group=replicate))+
geom_line(alpha=.5) + facet_grid(estimated~selex.scalar) +
geom_vline(xintercept=1, col='blue') +
geom_hline(yintercept=0, col='red') + myylim+ theme_bw()
g
ggsave(file.path('plots', 'depletion_re.png'), g, width=9, height=6)
### [End of Step 3]
### ------------------------------------------------------------
### End of File