-
Notifications
You must be signed in to change notification settings - Fork 1
/
Full_Analysis.R
112 lines (90 loc) · 4.52 KB
/
Full_Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#Load packages
library(dplyr)
library(ggplot2)
#Load GCAM data
soilC <- read.csv(file = 'Data/GCAM_soilC.csv')
timescales <- read.csv(file = 'Data/soil_timescales.csv')
glus <- read.csv(file = 'Data/GLU_codes.csv')
regions <- read.csv('Data/GCAM_regions.csv')
#Load experimental data
PostKwon <- read.csv(file= 'Data/Experimental Data.csv', na.strings = c("", "NA"))
Wei <- read.csv(file= 'Data/Wei et al Data.csv', na.strings = c("", "NA"))
#Join GLU codes with soilC data
soilC %>%
mutate(GLU_code = GLU) %>%
right_join(glus, by='GLU_code') %>%
right_join(regions, by='GCAM_region_ID') %>%
right_join(timescales, by='region')-> soilC_regions
#Simplify the data to just the stuff we'll need to compare with experimental data
soilC_regions %>%
select(Land_Type, soil_c, GLU_code, soilTimeScale, GCAM_region_ID) -> simple_soilC_regions
#Creating the Post & Kwon comparison data
PostKwon %>%
select(Initial_Land_Use, Final_Land_Use, GLU_code, GCAM_region_ID, Time, Exp_Rate) %>%
na.omit() %>%
mutate(Land_Type = Initial_Land_Use) %>%
right_join(simple_soilC_regions, by = c('GLU_code', 'Land_Type', 'GCAM_region_ID')) %>%
rename(initial_soil_c = soil_c) %>%
select(-Land_Type) %>%
mutate(Land_Type = Final_Land_Use) %>%
right_join(simple_soilC_regions, by = c('GLU_code', 'Land_Type', 'GCAM_region_ID')) %>%
rename(final_soil_c = soil_c) %>%
select(-Land_Type, -soilTimeScale.x) %>%
rename(soilTimeScale = soilTimeScale.y) %>%
na.omit() %>%
mutate(GCAM_Rate = (final_soil_c - initial_soil_c)/soilTimeScale, Rate_Difference = Exp_Rate - GCAM_Rate,
Exp_k = -log(abs(Exp_Rate)*Time +1)/Time,
GCAM_k = -log(final_soil_c/initial_soil_c)/soilTimeScale,
source = 'Post & Kwon'
) %>%
#This next line corrects the sign of Exp_k--we had to take the absolute value to avoid NaNs, so this accounts for that
mutate(Exp_k = ifelse(sign(Exp_k) == sign(Exp_Rate), Exp_k*(-1), Exp_k)) -> PostKwon_Comparison
#Creating the Wei et al comparison data
Wei %>%
select(Initial_Land_Use, Final_Land_Use, GLU_code, GCAM_region_ID, OC_decrease, Time) %>%
na.omit() %>%
mutate(Land_Type = Initial_Land_Use) %>%
right_join(simple_soilC_regions, by = c('GLU_code', 'Land_Type', 'GCAM_region_ID')) %>%
rename(initial_soil_c = soil_c) %>%
select(-Land_Type) %>%
mutate(Land_Type = Final_Land_Use) %>%
right_join(simple_soilC_regions, by = c('GLU_code', 'Land_Type', 'GCAM_region_ID')) %>%
rename(final_soil_c = soil_c) %>%
select(-Land_Type, -soilTimeScale.x) %>%
rename(soilTimeScale = soilTimeScale.y) %>%
na.omit() %>%
mutate(GCAM_Rate = (final_soil_c - initial_soil_c)/soilTimeScale,
GCAM_k = -log(final_soil_c/initial_soil_c)/soilTimeScale,
Exp_k = -log(1/((abs(OC_decrease)/100) +1))/Time,
source = 'Wei et al'
) %>%
#This next line corrects the sign of Exp_k--we had to take the absolute value to avoid NaNs, so this accounts for that
mutate(Exp_k = ifelse(sign(Exp_k) == sign(OC_decrease), Exp_k, Exp_k*(-1))) -> Wei_Comparison
#Now, we bind the two rows together. Because the Wei et al data doesn't have any raw rates, we won't include that data from Post & Kwon either
Full_Comparison <- bind_rows(
select(PostKwon_Comparison, -Exp_Rate, -Rate_Difference),
select(Wei_Comparison, -OC_decrease)
)
#Plot the two k vals against each other with a 1:1 line as well
ggplot(data = Full_Comparison, aes(x = Exp_k, y = GCAM_k)) +
geom_point(aes(shape = Final_Land_Use, color = Initial_Land_Use), size = 2) +
scale_shape_manual(values = c(4, 8, 16, 17)) +
scale_shape(solid = TRUE) +
geom_abline() +
xlab('Experimental k (1/y)') + ylab('GCAM Derived k (1/y)') +
theme_light() +
xlim(-.1, .4) + ylim(-.1, .4) +
labs(title = 'SOC k comparison during land use transition', color = 'Initial Land Use', shape = 'Final Land Use')
#Plot overlapping k histograms for the different k sources
ggplot() +
geom_histogram(aes(x = Full_Comparison$Exp_k, fill = Full_Comparison$source), alpha = 0.5, bins = 75) +
geom_histogram(aes(x = Full_Comparison$GCAM_k, fill = 'GCAM'), alpha = 0.5, bins = 75) +
xlab(expression(k~(y^-1))) + ylab('Count') +
scale_fill_manual(name = "Data Source",
values = c('GCAM'='#e3962b', 'Wei et al' = '#45912c', 'Post & Kwon' = '#3584B0')) +
scale_y_continuous(name = 'Count')
theme_light()
ggsave('Full_k_hist.jpeg', path = 'Graphs')
#T_Test
t.test(Full_Comparison$Exp_k, Full_Comparison$GCAM_k, alternative = 'two.sided') ->Full_T_test
#According to this, there is not a significant difference in the average of the means