-
Notifications
You must be signed in to change notification settings - Fork 2
/
05_analysis_2.R
146 lines (103 loc) · 3.8 KB
/
05_analysis_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
library(feather)
library(tidyverse)
library(broom)
#source("02_translation.R")
# Description of this Rscipt
######################################################################################
# Purpose: Detect the word change before and after the social movement
# by using statistical learing appoach
# Readin Files: ./data/df.feather
# Output: plots:
######################################################################################
df<-read_feather("data/df")
df1989<- df %>%
filter(Datetime >= as.Date("1989-06-04") & Datetime <= as.Date("1990-06-04")) %>%
separate (year_month, c("Year", "Month"), sep="-") %>%
# since "term" will be interfered with the "term" generated by the models
mutate (word = term)
data1989<-df1989 %>%
count (Datetime, word) %>%
ungroup() %>%
group_by (Datetime) %>%
mutate (day_total = sum(n),
percent = n / day_total) %>%
ungroup()
model1989<- data1989 %>%
group_by(word) %>%
filter(sum(n) > 50) %>%
do(tidy(glm(cbind(n, day_total - n) ~ Datetime, .,
family = "binomial"))) %>%
ungroup() %>%
filter (term == "Datetime") %>%
arrange (desc(abs(estimate))) %>%
filter (p.value <= 0.005)
# plot the word change before and after the end of the social movement:
model1989 %>%
mutate(adjusted.p.value = p.adjust(p.value)) %>%
ggplot(aes(estimate, adjusted.p.value)) +
theme_bw() +
geom_point() +
scale_y_log10() +
xlab("Estimated change over time") +
ylab("Adjusted p-value") +
labs(title="p-value vs. estimated change of the words during June 1989 - June 1990 (p<0.0005)")
model1989_top6 <- model1989 %>%
top_n(6,abs(estimate)) %>%
mutate (En = translateCnWords(word))
write_feather(model1989_top6, "data/model1989_top6")
model1989_top6 %>%
inner_join (data1989, by = "word") %>%
ggplot(aes(Datetime, percent)) +
geom_point() +
geom_smooth(method = 'loess') +
facet_wrap(~ En) +
labs (title = "Word change during 1989-1990", y ="Frequency", x ="Date")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
png("upload.png", width = 500, height =400)
ggsave("graph/top1989.png")
######################################################################################
df1986<- df %>%
filter(Datetime >= as.Date("1986-12-27") & Datetime <= as.Date("1987-12-27")) %>%
separate (year_month, c("Year", "Month"), sep="-") %>%
# since "term" will be interfered with the "term" generated by the models
mutate (word = term)
data1986<-df1986 %>%
count (Datetime, word) %>%
ungroup() %>%
group_by (Datetime) %>%
mutate (day_total = sum(n),
percent = n / day_total) %>%
ungroup()
model1986<- data1986 %>%
group_by(word) %>%
filter(sum(n) > 50) %>%
do(tidy(glm(cbind(n, day_total - n) ~ Datetime, .,
family = "binomial"))) %>%
ungroup() %>%
filter (term == "Datetime") %>%
arrange (desc(abs(estimate))) %>%
filter (p.value <= 0.005)
# plot the word change before and after the end of the social movement:
model1986 %>%
mutate(adjusted.p.value = p.adjust(p.value)) %>%
ggplot(aes(estimate, adjusted.p.value)) +
theme_bw() +
geom_point() +
scale_y_log10() +
xlab("Estimated change over time") +
ylab("Adjusted p-value") +
labs(title="p-value vs. estimated change of the words during December 1986 - December 1987 (p<0.0005)")
model1986_top6 <- model1986 %>%
top_n(6,abs(estimate)) %>%
mutate (En = translateCnWords(word))
write_feather(model1986_top6, "data/model1986_top6")
model1986_top6 %>%
inner_join (data1986, by = "word") %>%
ggplot(aes(Datetime, percent)) +
geom_point() +
geom_smooth(method = 'loess') +
facet_wrap(~ En) +
labs (title = "Word change during 1986-1987", y ="Frequency", x ="Date")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
png("upload.png", width = 500, height =400)
ggsave("graph/top1986.png")