-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmixed_aov_v3_universalized.R
496 lines (427 loc) · 20.7 KB
/
mixed_aov_v3_universalized.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# Library:
library(tidyverse)
library(dplyr)
library(magrittr)
library(ggrepel)
library(ggpubr)
library(rstatix)
library(readxl)
library(scales)
# USER -- Enter file path:
file_path <- "C:/Users/siddh/R/Projects/hourly_data/P19_10days_merged_Clean.Rda"
# If Using an RDA file which needs further specification (ex. 1 file of 3 in file path) please specify EXACT RDA file name below:
rda_name <- "df.hourly"
# USER -- run below line to create function to load file
load_file <- function(file_path) {
cat("\nPlease select file type:\n")
cat("1. CSV\n")
cat("2. Excel\n")
cat("3. Rda File\n")
filetype <- as.integer(trimws(readline("Enter your choice (1, 2, or 3): ")))
if (filetype == 1) {
data_new <- read_csv(file_path)
}
if (filetype == 2) {
data_new <- read_excel(file_path)
}
if (filetype == 3) {
load(file_path, envir = globalenv())
cat("\nDoes your Rda file need to be further specified? Please note that if your answer is yes, you must specify the rda file you are attempting to retreive in line 15\n")
cat("1. Yes\n")
cat("2. No\n")
specify_rda <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (specify_rda == 1) {
data_new <- get(rda_name)
assign('data_new', data_new, envir=.GlobalEnv)
cat("Please note Rda file name in line 15, then continue running line 61 function below")
}
else if (specify_rda == 2) {
cat("Script will proceed!")
}
}
}
## USER - RUN THIS LINE TO LOAD FILE:
load_file(file_path)
# The script is designed such that each "section" of the script CREATES and then RUNS "mini-functions" that address parts of the ANOVA process
## OBSOLETE: data_new <- as_tibble(df.hourly)
# USER Input: Step 1
# Please Replace
DV <- "FoodIn.cum.kcal"
Group <- "Group"
Time <- "Time"
Subject <- "Animal"
Dependent_variable <- "Food Intake (kcal)" ## FOR GRAPHING PURPOSES, please add units in parentheses
data_new <- data_new %>%
rename(
DV = !!sym(DV),
Group = !!sym(Group),
Time = !!sym(Time),
Subject = !!sym(Subject)
)
#DV <- maybe use rlang::expr()
## USER - Running this line will package the entire script into one function. If you choose to run this, scroll to line 472 once complete to run the function
# You instead have the option of manually "creating" and running each function for assumptions, ANOVA, post hoc, and visualization
anova_general <- function(data_new) {
## This is something to consider!! Take NOTE: THE FACTOR CONVERSION IS EXTREMELY SPECIFIC
# Step 2 - Assumptions:
# Assumption: Outlier Check
# Below Function tests for outliers.
# If outliers undetected, script proceeds
# If outliers detected, script presents outliers, and user is given option of eliminating or keeping the outlier in their analysis
option_outliers <- function(data_new) {
outliers <- data_new %>%
identify_outliers(DV)
# Identifies outliers
if (nrow(outliers) > 0) {
# Tests if outliers were produced by seeing if the outliers function produces a size > 0
cat("Outliers detected:\n")
print(outliers)
# Shows outliers
cat("\nOptions:\n")
cat("1. Eliminate detected outliers\n")
cat("2. Keep detected outliers\n")
option <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
# Offer options for treatment
if (option == 1) {
data_no_outlier <- anti_join(data_new, outliers)
View(data_no_outlier)
assign('data_new', data_no_outlier, envir=.GlobalEnv)
}
# Recreate dataframe without outlier index. It will continue to be called data_new
else if (option == 2) {
message("Script will proceed!")
return(NULL)
# Return original data, keeping outlier in the data without elimination
}
}
else {
message("No outliers, script will proceed!")
return(NULL)
# No outlier pathway
}
}
option_outliers(data_new)
# Assumption: Normality
# Tests normality through 3 possible ways, with descriptions and information in "cat" message presentations
check_normals <- function(data_new) {
cat("\nPlease Select:\n")
cat("1. qqplot, meant for sample size>50 (prone to unreadable visualization if data is too large)\n")
cat("2. Shapiro Test, prone to over-sensitivity in minor deviations for large data\n")
cat("3. Histogram, for subjective self-reference, if data is too large for above options")
choose <- as.integer(trimws(readline("Enter your choice (1, 2, or 3): ")))
if (choose == 1) { # qqplot choice
# Specify faceting
cat("\nPlease Select how you would like your data to be faceted in Time:\n")
cat("1. Facet by unique time points in dataframe (please note too many unique points will create an unreadable plot)\n")
cat("2. If time points are in hours, facet by days. Please note both options may take some time to generate the plot\n")
qqchoose <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (qqchoose == 1) {## qqplot choice 1
p <- ggqqplot(data_new, "DV", ggtheme = theme_bw()) +
facet_grid(Group~Time)
print(p)
cat("\nPlease see plot printed. Are you satisfied with Normality?:\n")
cat("1. Yes\n")
cat("2. No\n")
normals <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (normals == 1) {
message("Normality Assumption Satisfied")
}
if (normals == 2) {
message("The 2-way Mixed ANOVA is fairly robust even under slight normality violations. Depending on the perceived normality from the qqplot, you also have the option of continuing with this script. Otherwise, alternate statistical analysis may need to be considered")
}
}
if (qqchoose == 2) { ## qqplot choice 2
## Conversion to Days
data_qqtemp <- data_new
data_qqtemp$days <- data_new$Time/24
data_qqtemp$days <- floor(data_qqtemp$days)
p1 <- ggqqplot(data_qqtemp, "DV", ggtheme = theme_bw()) +
facet_grid(Group~days)
print(p1)
cat("\nPlease see plot printed. Are you satisfied with Normality?:\n")
cat("1. Yes\n")
cat("2. No\n")
normalsqq <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (normalsqq == 1) {
message("Normality Assumption Satisfied")
}
if (normalsqq == 2) {
message("The 2-way Mixed ANOVA is fairly robust even under slight normality violations. Depending on the perceived normality from the qqplot, you also have the option of continuing with this script. Otherwise, alternate statistical analysis may need to be considered")
}
}
}
if (choose == 2) { ## shapiro test choice
c <- shapiro.test(data_new$DV)
print(c)
cat("\nPlease see p-value printed. A p-value under 0.05 according to the Shapiro test, is statistically significant and deviates from normality. Please keep in mind this test is extremely sensitive to deviation. Are you satisfied with Normality?:\n")
cat("1. Yes\n")
cat("2. No\n")
normals2 <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (normals2 == 1) {
message("Normality Assumption Satisfied")
}
if (normals2 == 2) {
message("The 2-way Mixed ANOVA is fairly robust even under slight normality violations. The shapiro test is extremely sensitive to deviations, especially for larger data sets. You also have the option of continuing with this script. Otherwise, alternate statistical analysis may need to be considered")
}
}
if (choose == 3) { ## basic histogram choice
hist(data_new$DV, xlab = "Dependent Variable", main="Histogram of Variable Distribution")
cat("\nPlease see the histogram modelling your data. Are you satisfied with Normality?:\n")
cat("1. Yes\n")
cat("2. No\n")
normals2 <- as.integer(trimws(readline("Enter your choice (1 or 2): ")))
if (normals2 == 1) {
message("Normality Assumption Satisfied")
}
if (normals2 == 2) {
message("The 2-way Mixed ANOVA is fairly robust even under slight normality violations. Depending on the perceived normality from the plot, you also have the option of continuing with this script. Otherwise, alternate statistical analysis may need to be considered")
}
}
}
check_normals(data_new) # Run above function
# Conversion to Factor (do only post-outlier and post-normality)
data_new <- data_new %>%
convert_as_factor(Subject, Time, Group)
# Assumption: Homogeneity of Variance
# The Levene Test is used for checking this
check_variance <- function(data_new) {
levene <- data_new %>%
group_by(Time) %>%
levene_test(DV ~ Group)
detected_rows <- levene %>%
filter(p < 0.05)
# A value under 0.05 for p is usually an indicator of violation of this assumption
if (nrow(detected_rows) > 0) {
message("Levene Test Violation Detected:")
print(detected_rows)
message("This data does not pass the Levene Homogeneity of Variance Test. Alternate statistical analysis may need to be considered. You also have the option of proceeding.")
} else {
message("Levene Test Assumption Satisfied")
}
}
check_variance(data_new)
# Assumption: Homogeneity of Covariance
# The Box's M Test is used here. Due to the extreme sensitivity of this test, the qualification to pass is p>0.001. Even if data does not meet this low threshold, however, many will continue the ANOVA analysis
check_covariance <- function(data_new){
cov <- box_m(data_new[, "DV", drop = FALSE], data_new$Group)
print(cov)
if (cov$p.value < 0.001) {
# A value under 0.001 for p is usually an indicator of violation of this assumption
message("This data does not pass the Box's M Homogeneity of Covariance Test. Alternate statistical analysis may need to be considered. Often the violation may just need to be noted, but the Mixed ANOVA test can be run anyway. Additionally, the Greenhouse-Geisser correction will be later applied in this program, which can often help account slightly for this violation.")
}
else {
message("Box's M Test Assumption Satisfied")
}
}
check_covariance(data_new)
# Now, ANOVA test is run and checks for any effects. It also applies corrections (specified below)
run_anova <- function(data_new) {
## Applying Greenhouse-Geisser sphericity correction and Mauchly Test for Sphericity
res.aov <- anova_test(
data = data_new, dv = DV, wid = Subject,
between = Group, within = Time
)
message("Mauchly Test internally applied in res.aov")
message("get_anova_table: the Greenhouse-Geisser sphericity correction is automatically applied to factors violating the sphericity assumption.")
aov <- get_anova_table(res.aov)
print(aov)
if (aov[1,6] == '*') {
message("Group Main Effect Detected")
}
if (aov[2,6] == '*') {
message("Time Main Effect Detected")
}
if(aov[3,6] == '*') {
message("Interaction Between Group and Time Detected")
}
assign('aov', aov, envir=.GlobalEnv)
return(aov)
# returns an ANOVA dataframe with key details. This can be retrieved later as "aov".
}
run_anova(data_new)
# Post Hoc, seeing Group effects at every time point
treat_maineffect<- function(data_new) {
print("Now running post-hoc tests for signifcance by time point. Depending on data size, this may take slightly longer than expected")
one_way_test <- data_new %>%
group_by(Time) %>%
anova_test(dv = DV, wid = Subject, between = Group) %>%
get_anova_table() %>%
adjust_pvalue(method = "bonferroni") %>%
as_tibble()
View(one_way_test)
assign('one_way_test', one_way_test, envir=.GlobalEnv)
colnames(one_way_test)[7] <- "Significance"
# Find Significance from dataframe. Below line builds a dataframe based only on significance.
# Then a row count is done to check if significance is present
detected_sig <- subset(one_way_test, grepl("\\*", Significance))
print(one_way_test)
if (nrow(detected_sig) > 0) {
message("Significance in Group main effect was detected at certain time points")
message("Please see complete table above to record time points where signfificance was seen")
}
else {
message("No Significance was detected on the dependent variable between groups at any time point")
}
}
treat_maineffect(data_new)
## Pairwise Data frame Construction for Plotting. This function accomplishes the same thing as the post-hoc test above, but does so for the prupose of visualization. This table is also retreivable
make_pairwise <- function(data_new) {
Max_mass <- data_new %>%
group_by(Time) %>%
summarize(max(DV))
# The above code generates a data frame for maximum DV value to be added to the pairwise function.
# This is so that the mass function can be carried to significant dataframes for plotting of significance indicators later
pw <<- data_new %>%
group_by(Time) %>%
pairwise_t_test(DV ~ Group, p.adjust.method = "bonferroni") %>%
add_column(yplacement = Max_mass$'max(DV)')
# The above last line is for adding a coordinate for assisting in placement of significance labels in the pw dataframe
View(pw)
pw <- print(pw)
return(pw)
}
make_pairwise(data_new)
## AT this point, please create and execute either final_box1 (unaltered graph) function, or final_box2 (smoothed graph with full statistical notations) function.
### PLEASE RUN THIS FUNCTION IF YOU WOULD AN UNALTERED GRAPH (THIS MAY TAKE SLIGHTLY LONG)
## Use this pairwise dataframe to generate final boxplot
final_box1 <- function(pw) {
singlesig <- pw %>%
filter(p.signif == "*") %>%
select(Time, p, yplacement)
## Above and below code blocks are for setting the y-value placement of the asterisk (signficance indicators)
doublesig <- pw %>%
filter(p.signif == "**") %>%
select(Time, p, yplacement)
cat("\nNow making visualization. Please choose interval size for Time (x-axis):\n")
int_chosen <- as.integer(trimws(readline("Enter your choice as a numeric value based on how far apart you would like your time variable spaced: ")))
## Choosing interval size to avoid unreadable x-axis
data_new$Time <- as.numeric(as.character(data_new$Time))
## Conversion to numeric for processing in labeling adjustment
# Choosing F-statistic presentation
cat("\nPlease choose the precise F-statistic type you would like on your plot:\n")
cat("1. Group Main Effect\n")
cat("2. Time Main Effect\n")
cat("3. Group-Time Interaction Effect\n")
effect_type <- as.integer(trimws(readline("Enter your choice as 1, 2, or 3: ")))
if (effect_type == 1) {
f_chosen = 1
Effect_F <- aov$'F'[1]
Effect_F
}
if (effect_type == 2) {
f_chosen = 2
Effect_F <- aov$'F'[2]
Effect_F
}
if (effect_type == 3) {
f_chosen = 3
Effect_F <- aov$'F'[3]
Effect_F
}
fin_bxp <- ggboxplot(
data_new, x = "Time", y = "DV",
color = "Group", palette = "jco") +
geom_text(x = max(as.numeric(data_new$Time)), y = min(data_new$DV),
label = sprintf("F(%.2f) = %.3f", aov$DFn[f_chosen], Effect_F),
# The printed value on the plot represents the DF and F statistic value.
# For a breakdown on p-values by time point, please reference the "pw" DF
hjust = 1, vjust = 1, color = "red") +
geom_text(data = singlesig, aes(x=Time, y=yplacement, label = "*"), size = 5, hjust = 0.5, vjust = -0.8, color = "red") +
geom_text(data = doublesig, aes(x=Time, y=yplacement, label = "**"), size = 5, hjust = 0.5, vjust = -0.8, color = "red") +
labs(x = "Time (hours)", y = Dependent_variable) +
scale_x_discrete(breaks = seq(0, max(data_new$Time), by = int_chosen))+
theme_minimal()
fin_bxp <- fin_bxp + expand_limits(y = 0)
fin_bxp
}
final_box1(pw)
###################################
summary_data <- data_new %>%
group_by(Group, Time) %>%
summarise(mean_y = mean(DV),
se_y = sd(DV) / sqrt(n()))
singlesig <- pw %>%
filter(p.signif == "*") %>%
select(Time, p, yplacement)
singlesig$Time <- as.numeric(as.character(singlesig$Time))
## Above and below code blocks are for setting the y-value placement of the asterisk (signficance indicators)
doublesig <- pw %>%
filter(p.signif == "**") %>%
select(Time, p, yplacement)
doublesig$Time <- as.numeric(as.character(doublesig$Time))
###################################
## PLEASE RUN THIS FUNCTION IF YOU WOULD LIKE A SMOOTHED GRAPH (MAY TAKE SLIGHTLY LONG)
final_box2 <- function(pw) {
singlesig <- pw %>%
filter(p.signif == "*") %>%
select(Time, p, yplacement)
singlesig$Time <- as.numeric(as.character(singlesig$Time))
## Above and below code blocks are for setting the y-value placement of the asterisk (signficance indicators)
doublesig <- pw %>%
filter(p.signif == "**") %>%
select(Time, p, yplacement)
doublesig$Time <- as.numeric(as.character(doublesig$Time))
cat("\nNow making visualization. Please choose interval size for Time (x-axis):\n")
int_chosen <- as.integer(trimws(readline("Enter your choice as a numeric value based on how far apart you would like your time variable spaced: ")))
## Choosing interval size to avoid unreadable x-axis
summary_data$Time <- as.numeric(as.character(summary_data$Time))
data_new$Time <- as.numeric(as.character(data_new$Time))
## Conversion to numeric for processing in labeling adjustment
# Choosing F-statistic presentation
cat("\nPlease choose the precise F-statistic type you would like on your plot:\n")
cat("1. Group Main Effect\n")
cat("2. Time Main Effect\n")
cat("3. Group-Time Interaction Effect\n")
effect_type <- as.integer(trimws(readline("Enter your choice as 1, 2, or 3: ")))
if (effect_type == 1) {
f_chosen = 1
Effect_F <- aov$'F'[1]
Effect_F
}
if (effect_type == 2) {
f_chosen = 2
Effect_F <- aov$'F'[2]
Effect_F
}
if (effect_type == 3) {
f_chosen = 3
Effect_F <- aov$'F'[3]
Effect_F
}
fin_bxp2 <-
ggplot(summary_data, aes(x = Time, y = mean_y, color = Group)) +
stat_summary(data = summary_data, fun = "mean", geom = "line", aes(group = Group, x=Time, y=mean_y)) +
stat_summary(data = summary_data, aes(x=Time, y = mean_y, group = Group), fun.data = mean_se, geom = "ribbon", alpha = 0.2) +
geom_ribbon(data = summary_data, aes(ymin = mean_y - se_y, ymax = mean_y + se_y, fill = Group), alpha = 0.2, show.legend = FALSE) +
geom_text(x = max(as.numeric(data_new$Time)), y = min(data_new$DV),
label = sprintf("F(%.2f) = %.3f", aov$DFn[f_chosen], Effect_F),
# The printed value on the plot represents the DF and F statistic value.
# For a breakdown on p-values by time point, please reference the "pw" DF
hjust = 1, vjust = 1, color = "red") +
geom_text(data = singlesig, aes(x=Time, y=yplacement, label = "*"), size = 5, hjust = 0.5, vjust = -0.8, color = "red") +
geom_text(data = doublesig, aes(x=Time, y=yplacement, label = "**"), size = 5, hjust = 0.5, vjust = -0.8, color = "red") +
labs(x = "Time (hours)", y = Dependent_variable) +
theme_minimal() +
scale_x_continuous(breaks = seq(0, max(data_new$Time), by = int_chosen))
fin_bxp2 <- fin_bxp2 + expand_limits(y = 0)
fin_bxp2
}
final_box2(pw)
}
# Below line is for running the "mega" function which encapsulates each section into one. See above instructions at the top of the script.
anova_general(data_new)
save_tables <- function(aov, one_way_test, pw) {
cat("\nWould you like to save your final ANOVA and post-hoc tables to your current working directory? These will be saved as CSV files\n")
cat("1. Yes\n")
cat("2. No\n")
Saving <- as.integer(trimws(readline("Enter your choice as 1 or 2: ")))
if (Saving == 1) {
write.csv(aov, "ANOVA_table.csv")
write.csv(one_way_test, "One_way_test_posthoc.csv")
write.csv(pw, "Pairwise_test_posthoc.csv")
}
if (Saving == 2) {
cat("Thank you for using this program. Please manually save your final visualization.")
}
}
save_tables(aov, one_way_test, pw)