-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path00_functions.r
719 lines (562 loc) · 21.6 KB
/
00_functions.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
#' ---
#' title: "Helper functions for invertebrate analysis"
#' author: "Paul Czechowski"
#' date: "October 22nd, 2016"
#' output: pdf_document
#' toc: true
#' highlight: zenburn
#' bibliography: ./references.bib
#' ---
#'
#' # Preface
#'
#' This code is tested using a raw R terminal. Path names are defined relative
#' to the project directory. This code commentary is included
#' in the R code itself and can be rendered at any stage using
#' `rmarkdown::render ("./00_functions.r")`. Please check the
#' session info at the end of the document for further notes on the coding environment.
#'
#' <!-- #################################################################### -->
#'
#' # Functions for `phyloseq` objects
#'
#' ## Remove empty data
#'
#' This function removes "0" count phylotypes from samples and samples with "0"
#' phylotypes. Currently used as subroutine for `agglomerate()`.
remove_empty <- function(phsq_ob){
# package loading
require("phyloseq")
# filter Phylotypes
phsq_ob <- prune_taxa (taxa_sums (phsq_ob) > 0, phsq_ob)
# filter samples
phsq_ob <- prune_samples (sample_sums (phsq_ob) > 0, phsq_ob)
# return object
return (phsq_ob)
}
#' ## Agglomerate taxonomy
#'
#' To increase data density, rank based agglomeration may be desirable. This
#' function checks for the presence of the correct ranknames in passed in string
#' and then agglomerates.
agglomerate <- function (ps_ob, tax_rank) {
# package loading
require("phyloseq")
# check correct naming of taxonomy strings
stopifnot (tax_rank %in% rank_names (ps_ob))
# agglomerate taxa on requested level
ps_ob <- tax_glom (ps_ob, tax_rank)
# filter object
ps_ob <- remove_empty (ps_ob)
# return agglomerated object
return (ps_ob)
}
#' ## Reset presence absence values
#'
#' This calculates presence / absence within a given `phyloseq` object
#' on phylotypes. Only makes sense with agglomerated object.
make_binary <- function(phsq_ob){
# package loading
require("phyloseq")
# transform data to binary
otu_table (phsq_ob) [otu_table (phsq_ob) > 0] <-1
# return object
return (phsq_ob)
}
#' ## Inverse the `log` of CSS'd data
#'
#' CSS'd data is `log` scaled by default, which can be reverted using this
#' function. Zero counts become 1, which is not handled by this function,
#' but necessary if it is to be employed.
get_exp <- function(phsq_ob){
# package loading
require("phyloseq")
# inverse log scaling
phsq_ob <- transform_sample_counts (phsq_ob, function(OTU) exp(OTU))
# return object
return (phsq_ob)
}
#' ## Transposing phylotype counts
#'
#' Function for transposing phylotype count tables from Phyloseq objects to the
#' rest of the R world (i.e. `vegan`).
get_phylotypes <- function (ps_ob) {
# package loading
require("phyloseq")
OTU <- otu_table (ps_ob)
if (taxa_are_rows (OTU)) {
OTU <- t (OTU)
}
return (as (OTU, "matrix"))
}
#' ## Renaming phylotypes
#'
#' Function to rename phylotypes from identifier to specific taxon name.
rename_phylotypes <- function (spc, ps_ob, tax_rank) {
# package loading
require("phyloseq")
# check correct naming of taxonomy strings and matrix format of input matrix
stopifnot (tax_rank %in% rank_names (ps_ob) & is.matrix (spc))
# set names
colnames(spc) <- tax_table (ps_ob) [which (colnames (spc) %in%
rownames (tax_table (ps_ob))), tax_rank]
# return modified species matrix
return (spc)
}
#' ## Getting data frames of predictor variables
#'
#' Function to get selected predictor variables from `phyloseq` objects.
get_predictors <- function (ps_ob, vars) {
# package loading
require("phyloseq")
# for function writing / testing / debugging
# ps_ob <- phsq_ob_comb
# vars <- c ("AREA", "GENE")
# stop if passed in variables are not contained in the sample data
# of the phyloseq object, or if a phyloseq object wasn't passed in
stopifnot (all (vars %in% colnames (sample_data(ps_ob))))
stopifnot (class(ps_ob) == "phyloseq")
# generate a predictor data frame from selected columns
predictors <- data.frame (sample_data (ps_ob)[ , vars])
# return data frame
return (predictors)
}
#' ## Relative abundances
#'
#' This calculates relative abundances within a given `phyloseq` object
#' (Used for plotting).
make_proportional <- function(phsq_ob){
# package loading
require("phyloseq")
phsq_ob <- transform_sample_counts(phsq_ob, function(OTU) OTU / sum(OTU))
return (phsq_ob)
}
#' # Functions for `base` objects
#'
#' ## Re-setting presence absence counts
#'
#' Function for re-setting presence / absence counts after taxon agglomerating
set_presences <- function (spc) {
# check if data is matrix (perhaps not needed)
stopifnot ( is.matrix(spc))
# set values
spc [ spc > 0 ] <- 1
# return object
return (spc)
}
#' ## Matching predictor variables with phylotype data
#'
#' For some samples biological information is available, but no abiotic information
#' because collection of predictors sometimes wasn't possible. Such samples cannot
#' be included in some downstream analyses. In this function `complete.cases()` is run on
#' predictor variables alone.
cmpl_predictors <- function (predictors) {
# test the input data
stopifnot (class (predictors) == "data.frame")
# filtering
predictors_filtered <- predictors[ which (complete.cases(predictors)), ]
# return filtered object
return (predictors_filtered)
}
#' In this function the phylotype data is pruned to defined cases in a passed-in
#' data frame with predictors.
cmpl_phylotypes <- function (phylotypes, predictors) {
# for function development
# load (path_phsq_ob_comb)
# phylotypes <- get_phylotypes (agglomerate (phsq_ob_comb, "Class"))
# predictors <- cmpl_predictors (get_predictors (agglomerate (phsq_ob_comb,
# "Class"), vars = c("AMMN", "NITR", "PHOS", "POTA", "SLPH", "CARB","COND",
# "PHCC", "PHHO")))
# test the input data
stopifnot (class (phylotypes) == "matrix" & class (predictors) == "data.frame")
# remove samples from biotic observations if they are not matched by abiotic ones
phylotypes_filtered <- phylotypes[which ( rownames (phylotypes) %in%
rownames (predictors)), ]
# return filtered matrix
return (phylotypes_filtered)
}
#' ## Getting R-compatible data frames
#'
#' This function returns a matrix `spc` and a data frame `obs` from a passed-in
#' `phyloseq` object. It also returns vectors `loc` and `gen` (to have available
#' location groups and marker coverage for plots etc.).
get_list <- function (ps_ob, tax_rank, pred_cat, pres_abs = FALSE) {
# package loading
require("phyloseq")
# check correct naming of taxonomy strings and type of predictor category
stopifnot (tax_rank %in% rank_names (ps_ob) & is.character (pred_cat))
# taxon agglomeration
ps_ob <- agglomerate (ps_ob, tax_rank)
# data frame of soil geochemical variables
obs <- get_predictors (ps_ob, vars = pred_cat)
# matrix of biotic observations
spc <- get_phylotypes (ps_ob)
# EXPERIMENTAL: convert "0" to NAs, to get clear pattrern on defined values
# message ("EXPERIMENTAL: Substitution of values below detection limit with NA
# instead of '0'")
# obs[obs == 0] <- NA # replacing 0s with NAs
# get complete abiotic data
obs <- cmpl_predictors (obs)
# filter bio data respectively
spc <- cmpl_phylotypes (spc, obs)
# set presence / absence for species matrix
# if flag is set
if( pres_abs == TRUE ){ spc <- set_presences (spc)}
# set phylotype names
spc <- rename_phylotypes (spc, ps_ob, tax_rank)
# get groups for biplot (write as subroutine?)
grp <- c (sample_data (ps_ob)[ which (rownames (obs) %in% sample_names
(ps_ob)), ])$AREA
# get marker names
gen <- c (sample_data (ps_ob)[ which (rownames (obs) %in% sample_names
(ps_ob)), ])$GENE
# diagnostic message
message ("samples -- spc: ", nrow(spc), "; obs: ", nrow(obs) , "; grp's: ", length (grp),
"; gen's: ", length (gen))
# use data frames instead of vector to not loose the sample information
grp <- data.frame(grp, row.names = rownames(obs))
gen <- data.frame(gen, row.names = rownames(obs))
# create return list (add elements to the **END** if necessary)
return (list ("spc" = spc, "obs" = obs, "grp" = grp, "gen" = gen))
}
#' ## Matching PCA labels with input object
#'
#' This function adjust the location factor variable to match and input data
#' frame (`obs`) or matrix (`spc`).
shorten_groups <- function (grp, pca_df) {
# check input data - are the logicals correct in short form
stopifnot ( is.matrix(pca_df) || is.data.frame (pca_df) && is.data.frame (grp))
# shorten groups to match observations
grp <- grp[which (rownames (grp) %in% rownames (pca_df)) ,]
# diagnostic message
message ("Type conversion of group performed.")
# return vector
return (grp)
}
#' # Functions for pre-processing
#'
#' ## Remove outliers
#'
#' This function removes outliers from soil geochemical data.
remove_outliers <- function (obs) {
# package loading
library ("outliers")
# warning message
message ("This is only adequate for soil geochemical values.")
# print oilier count
out <- sum (outlier (as.matrix (obs), logical = TRUE), na.rm = TRUE )
inl <- length (outlier (as.matrix (obs), logical = TRUE))
message ("There are ", out, " outliers among ", inl, " values.")
# remove outliers
obs <- data.frame (rm.outlier (as.matrix (obs), fill = TRUE))
# return object
return (obs)
}
#' ## Centred log ratio transformation
#'
#' As described elsewhere [@Ranganathan2011], compositional data is best
#' transformed using centred log ratio, as implemented in package `rgr()`. This
#' will remove the closure effect of compositional data.
#' Use this function on compositional data only, such as the mineral
#' data. This function replaces `0` with `NA`s. Adjust the input object via
#' `get_list()` if too many samples with complete observations are lost. Check
#' the `summary()` ouput below to get the `NA` numbers.
transform_clr <- function (obs) {
# package loading
require ("rgr")
# print message for use case
message ("Use this function on compositional data only, such as the mineral
data. This function replaces '0' with 'NA's. Adjust the input object via
'get_list() if too few samples with complete observations are lost. Check
the 'summary()' ouput below to get the 'NA' numbers." )
# `clr()` of `rgr` needs a matrix
obs <- as.matrix(obs)
# undetected values need to be removed
obs [obs==0] <- NA
# undetected values have been removed
print (summary (obs))
# centred log ratio transformation [@Aitchison1986]
obs <- clr (obs, ifclose = TRUE)
# return transformed object
return (obs)
}
#' ## Transformations (Yeo-Johnson, Box-Cox, or none)
#'
#' This function performs Yeo-Johnson transformation [@yeo2000] (or others), centres, scales the data
#' and removes near zero variance values from the data. It's intended to be used
#' with continuous data (soil geochemical values), but will work on any data frame.
transform_any <- function (obs, method = c("center", "scale", "YeoJohnson", "nzv")) {
# package loading
require ("caret")
# train model
pp <- preProcess(obs, method = method)
# apply model
obs_trans <- predict(pp, newdata = obs)
# print transformed data
print(summary(obs_trans))
# return transformed data
return (obs_trans)
}
#' ## Removing co-correlated variables
#'
#' Intended for use with soil geochemical values, but will accept and data frame.
remove_cocorrelated <- function (obs) {
# package loading
require ("caret")
# Diagnostic message
message ("Correlation treshhold is hard-coded with '.75'.")
# Finding the co-correlations
high_cor_obs <- findCorrelation (cor (obs), cutoff = .75) # ident. highly. corr
# Dividing the co-correlation set
del_obs <- obs[, high_cor_obs]
flt_obs <- obs[,-high_cor_obs]
# Info
message ("Above '.75' (and removed): ", paste (names (del_obs), collapse=","))
message ("Below '.75' (and retained): ", paste (names (flt_obs), collapse=","))
# return observations without co-correlation
return (flt_obs)
}
#' # Functions for plotting
#'
#' ## Create bi-plot
#'
#' This function generates a simple PCA biplot of a `prcomp()` object, with
#' circles defined by a matching factor variable.
get_biplot <- function (pcs, grp) {
# package loading
require("ggbiplot")
bip <- ggbiplot (pcs, choices = 1:2, scale = 1, pc.biplot = TRUE, ellipse = TRUE,
varname.size = 4, groups = grp, circle = TRUE) + theme_bw() + theme (legend.title =
element_blank(), legend.background = element_rect (color = "gray90", size =
0.3, linetype= 1), legend.justification=c(1,1), legend.position=c(1,1))
return (bip)
}
#' ## Plot PCA variance
#'
#' Plot eigenvalues and percentages of variation of an ordination object
#' Kaiser rule and broken stick model Usage: `evplot(ev)` where ev is a vector
#' of eigenvalues. License: GPL-2 Author: Francois Gillet, 25 August 2012.
#' Average might be dragged way below 1 here, so this is is perhaps not
#' the perfect implementation. Not used anymore.
get_evplot <- function(ev) {
# Broken stick model (MacArthur 1957)
n <- length(ev)
bsm <- data.frame(j=seq(1:n), p=0)
bsm$p[1] <- 1/n
for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
bsm$p <- 100*bsm$p/n
# Plot eigenvalues and % of variation for each axis
op <- par(mfrow=c(2,1))
barplot(ev, main="Eigenvalues", col="bisque", las=2)
abline(h=mean(ev), col="red")
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE,
main="% variation", col=c("bisque",2), las=2)
legend("topright", c("% eigenvalue", "Broken stick model"),
pch=15, col=c("bisque",2), bty="n")
par(op)
}
#' An improved version of the the function above written with `ggplot2`.
#' Author: Ben Rohrlach, 7 August 2016. Used instead of `get_evplot`.
plot_pcvars <- function (pcs, plotScreen = FALSE){
# package loading
require(ggplot2)
variances <- pcs$sdev^2
PCs <- 1:length(variances)
gp <- ggplot () + xlab ('\nPrincipal Component') + ylab ('Variances\n') +
geom_line (aes (x = PCs, y = variances), size = 1) +
geom_point (aes (x = PCs, y = variances), size = 5) +
geom_point (aes (x = PCs, y = variances), size = 4, col='white') +
scale_x_continuous (labels = PCs, breaks = PCs) +
coord_cartesian ( ylim = c (0, max (variances)), xlim = range (PCs))
if (plotScreen){
gp
}
return(gp)
}
#' ## Get `corrplots()`
#'
#' Simple function to get `corrplots()`; intended for use with soil geochemical values,
#' but should work with any other object as well. Not used anymore.
get_corrplots <- function (obs, flt_obs) {
# message
message ("ggcotplot() will work better here")
# package loading
require("corrplot")
# create the plots
p1 <- corrplot.mixed (cor (obs), tl.pos="d")
p2 <- corrplot.mixed (cor (flt_obs), tl.pos="d")
# return list with plots
return (list ( "obs" = obs, "flt_obs" = flt_obs))
}
#' ## Map phyloseq object
#'
#' The second function draws the sample map. Here I use package `phylogeo`, which
#' itself seems to be more of a helper to negotiate functionality between `ggplot2`
#' and `phyloseq`. Syntax options are limited, but it does the job.
#'
#' * `phylogeo` needs correct names of long / lat variables in the `sample_data()`
#' component of the `phyloseq` objects.
#' * Instead of centering the projection on the South Pole, it's centering is defined
#' by the range of the sample coordinates of the respective 'phyloseq` object.
#' * The plotting objects are returned and evaluated later.
#'
#' Draw a map with the sampling locations, incl. altering the names of the input
#' phyloseq object.
map_samples <- function(phsq_ob, col_long = NULL, col_lat = NULL, subtitle = NULL){
require (phylogeo)
# check input data
stopifnot (class (phsq_ob) == "phyloseq")
stopifnot (is.null (col_long) == FALSE)
stopifnot (is.null (col_lat) == FALSE)
# rename lat long columns in input phyloseq object to be recognized by
# phylogeo - veriable expectations are hard coded in phylogeo
names (sample_data(phsq_ob))[names (sample_data(phsq_ob)) == col_lat] <-
"Latitude"
names (sample_data(phsq_ob))[names (sample_data(phsq_ob)) == col_long] <-
"Longitude"
# set latitude and longitude for projection centring
latitude <- mean (range(sample_data (phsq_ob)$Latitude))
longitude <- mean (range(sample_data (phsq_ob)$Longitude))
# define title and subtitle
plot.title <- "Sample Range"
plot.subtitle <- subtitle
# draw a map, adjust coordinates and give a title, subtitle
map <- map_phyloseq (phsq_ob, region="Antarctica", color="AREA",
jitter=TRUE, jitter.x=2,jitter.y=2,) +
coord_map( projection = "ortho", orientation = c(latitude, longitude, 0)) +
ggtitle(bquote(atop(.(plot.title), atop(italic(.(plot.subtitle)), "")))) +
geom_path()
# return object
return (map)
}
#' ## Plot `phyloseq` object
#'
#' A simple barplot function to get an overview over the contents of `Phyloseq`
#' objects.:
barplot_samples <- function(ps_ob, tax_rank){
# package loading
require ("phyloseq")
require ("ggplot2")
# check correct naming of taxonomy strings
stopifnot (tax_rank %in% rank_names (ps_ob))
# define title and subtitle
plot.title <- "Invertebrate Composition"
plot.subtitle <- "18S rDNA CSS'd read counts (upper) or rank presence (lower) "
# draw a barplot, and give a title, subtitle
# re-set "facet_grid" if desirable, doesn't make much sense here
ps_bar <- plot_bar(ps_ob, fill = tax_rank) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 5)) +
theme(legend.position="left") +
ggtitle(bquote(atop(.(plot.title), atop(italic(.(plot.subtitle)), ""))))
# return object
return (ps_bar)
}
#' ## Plot regressions
#'
#' Returns regression lines. X axis is hard coded `"MNAGE"` to match input dataframe
#' `age_df`, function will loop over matching data frane column names that are
#' passed in with `y_axis`. No error input checking implemented, use wisely.
plot_regrs = function (y_axis, age_df){
# package loading
require("ggplot2")
# get the regression plot
regr_plot <- ggplot(age_df, aes_string(x= "MNAGE", y = y_axis)) +
geom_point(colour = 'red', size = 3) +
geom_smooth(method=lm) +
ylab(y_axis) +
xlab("Substrate Age") +
theme_bw() +
theme(axis.text.x = element_text(angle = 65, hjust = 1,
size = 8), axis.text.y = element_text (size = 8))
# return plot
return (regr_plot)
}
#' ## Functions for violin plots
#'
#' ### Add grouping factor for discrete X / continuous Y plots
#'
#' This is a "band-aid" function returning an data frame with a factor variable
#' if plotting by a grouping variable is desired, which is not (anymore) contained
#' in the data to be plotted. Factor data frame `dfr_x` is `merge()`d with
#' multivariate data frame `dfr_v`.
add_discretex <- function (dfr_v, dfr_x, dfr_x_name = NULL) {
# check input data to match function requirements
stopifnot (dfr_x_name != NULL)
stopifnot (is.data.frame (dfr_v) == TRUE)
stopifnot (is.data.frame (dfr_x) == TRUE)
stopifnot (ncol (dfr_x) == 1)
# naming discrete x factor
names(dfr_x) <- dfr_x_name
# merging data frame
dfr_v <- merge (dfr_v, dfr_x, by = "row.names", all = TRUE)
# cleaning data frame
dfr_v <- dfr_v [complete.cases (dfr_v), ]
rownames (dfr_v) <- dfr_v$Row.names
dfr_v$Row.names <- NULL
# return data frame
return (dfr_v)
}
#' ### Create data frames for `get_violin()`
#'
#' Create two-column data frame for `get_violin()`. Returns one two-column
#' data frame from input arguments.
get_viodf <- function (dfr_y_name, dfr, dfr_x_name){
# create two-column data frame
dfr <- dfr [ , c( dfr_y_name , dfr_x_name)]
# return two-column data frame
return (dfr)
}
#' ### Generate violin plots
#'
#' Generate violin plots from one discrete X and one continuous Y. Takes one
#' two column data frame, returns one `ggplot2` object
get_violin <- function (dfr, dfr_x_name = NULL){
# checking input data
stopifnot (is.data.frame (dfr) == TRUE)
stopifnot (dfr_x_name != NULL)
stopifnot (ncol (dfr) == 2)
stopifnot (any (names (dfr) %in% dfr_x_name))
stopifnot (length (dfr_x_name) == 1)
# build the violin
violin <- ggplot (dfr, aes_string (x = dfr_x_name, y = names(dfr) [which (names (dfr) != dfr_x_name)],
color = dfr_x_name)) +
geom_violin (scale = "area") +
stat_summary (fun.data = mean_sdl, geom = "pointrange") +
theme(legend.position='none')
# return the violin
return (violin)
}
#' ### Generate list of violin plot graphical objects
#'
#' Generate violin plots from one data frame and and on factor to by plotting by
#' Returns list of `ggplot2` objects. Requires `get_viodf()` and `get_violin()`.
get_violinplotlist <- function (dfr, dfr_x_name = NULL) {
# for function building
# dfr <- obs
# dfr_x_name <- "AREA"
# checking input data
stopifnot (is.data.frame (dfr) == TRUE)
stopifnot (dfr_x_name != NULL)
stopifnot (any (names (dfr) %in% dfr_x_name))
stopifnot (length (dfr_x_name) == 1)
# create list of two-variable data frames
# one of which is the variable to be plotted, the other the factor to group
# by, all to be passed to get_violin()
dfr <- lapply ( names(dfr) [which (names (dfr) != dfr_x_name)],
get_viodf, dfr, dfr_x_name )
# generate violin plots from list of data frames
groblist <- lapply (dfr, get_violin, dfr_x_name)
# return list of violin plots
return (groblist)
}
#' <!-- #################################################################### -->
#'
#' # Session info
#'
#' The code and output in this document were tested and generated in the
#' following computing environment:
#+ echo=FALSE
sessionInfo()
#' # References