-
Notifications
You must be signed in to change notification settings - Fork 1
/
organize_old_files.R
31 lines (24 loc) · 1.15 KB
/
organize_old_files.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# merge texts
texts_drought <- readRDS("data/texts1.rds")
texts_drought[, type := "drought"]
texts_flood <- readRDS("data/texts21.rds")
texts_flood[, type := "flood"]
texts_both <- rbindlist(list(texts_drought, texts_flood))
# merge text metadata
articles_drought <- readRDS("data/2018-01-17_results_all.rds")
articles_flood <- readRDS("data/2018-02-03_results_flood.rds")
articles_both <- rbindlist(list(articles_drought, articles_flood))
# combine together
merged <- articles_both[, .(url, name, snippet)][texts_both, on = "url"]
merged[, text := text1]
merged[, text1 := NULL]
merged[, id := NULL]
setcolorder(merged, c("type", "domain", "url", "name", "snippet", "text"))
# saveRDS(merged, "data/merged2018-02-05.rds")
# write.csv(merged, "data/merged2018-02-05.txt", sep = "/t")
merged <- readRDS("data/merged2018-02-05.rds")
merged <- unique(merged)
# add indicator of presence of a keyword indication effect
# (taken from geographical-related-keywords.txt -- from original article)
merged[, effect_keyword := str_detect(text, "(affected|hit|situation|cut off|displaced|destroyed|submerged|collapsed)")]
write.csv(merged, "data/merged2018-02-14.txt", sep = "/t")