forked from cis-ds/pipeline-example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_filter-reorder-plot.R
34 lines (27 loc) · 990 Bytes
/
01_filter-reorder-plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
library(tidyverse)
library(forcats)
library(feather)
# FYI: there is one call to plyr::revalue() so plyr should be installed but not
# loaded
lotr_dat <- read_tsv("data/lotr_raw.tsv")
# clean the data
## reorder Film factor levels based on story
## revalue Race
## - no one knows that the Ainur are the wizards: Ainur --> Wizard
## - let's be consistent: Men --> Man
## drop least frequent Races
## reorder Race based on words spoken
## arrange the data on Race, Film, Words
lotr_dat <- lotr_dat %>%
mutate(Film = factor(Film),
Race = plyr::revalue(Race, c(Ainur = "Wizard", Men = "Man"))) %>%
filter(!(Race %in% c("Gollum", "Ent", "Dead", "Nazgul"))) %>%
mutate(Race = fct_reorder(Race, Words, fun = sum)) %>%
arrange(Race, Film, Words) %>%
droplevels()
# make a plot
p <- ggplot(lotr_dat, aes(x = Race, weight = Words)) +
geom_bar()
ggsave("graphics/barchart_total-words-by-race.png", p)
# write data to file
write_feather(lotr_dat, "data/lotr_clean.feather")