-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualize.Rmd
260 lines (208 loc) · 4.84 KB
/
visualize.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
---
title: "Visualize"
author: "Diana LaScala-Gruenewald"
date: "7/1/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Read Data
```{r}
# load libraries
library(here)
library(readr)
library(DT)
# define variables
url_ac <- 'https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv'
csv_ac <- here('data/cciea_AC_url_download.csv')
# Download data
if (!file.exists(csv_ac)) # No {}?
download.file(url_ac, csv_ac)
# Read data
d_ac <- read_csv(csv_ac, col_names=F, skip=2)
names(d_ac) <- names(read_csv(csv_ac))
# Show data
datatable(d_ac)
```
## Plot data statically with `ggplot2`
### Simple line plot `+ geom_line()`
```{r}
library(dplyr)
library(ggplot2)
# Subset data
d_coast <- d_ac %>%
# Select columns
select(time, total_fisheries_revenue_coastwide) %>%
# Filter rows
filter(!is.na(total_fisheries_revenue_coastwide))
datatable(d_coast)
```
```{r}
# Create ggplot object
p_coast <- d_coast %>%
# Setup aesthetics
ggplot(aes(x = time, y = total_fisheries_revenue_coastwide)) +
# Add geometry
geom_line()
# Show plot
p_coast
```
### Trend line `+ geom_smooth()`
```{r}
p_coast +
geom_smooth(method = 'lm')
```
### Distribution of values `+ geom_histogram()`
Note that `geom_histogram()` plots the frequency of values for a single variable.
```{r}
d_coast %>%
# Setup aesthetics
ggplot(aes(x = total_fisheries_revenue_coastwide)) +
# Add geometry
geom_histogram(bins = 10)
```
### Series line plot `aes(color = region)`
```{r}
library(stringr)
library(tidyr)
# Create data aggregated by region
d_rgn <- d_ac %>%
# Select columns
select(
time,
starts_with('total_fisheries_revenue')) %>%
# Exclude coastwide data
select(-total_fisheries_revenue_coastwide) %>%
# Pivot longer
pivot_longer(-time) %>%
# Create region column
mutate(
region = name %>%
str_replace('total_fisheries_revenue_', '') %>%
str_to_upper()) %>%
# Filter missing values
filter(!is.na(value)) %>%
# Select columns
select(
time,
region,
value
)
# Create plot object
p_rgn <- d_rgn %>%
# Setup aesthetics
ggplot(aes(
x = time,
y = value,
group = region,
color = region)) +
# Add geometry
geom_line()
# Show
p_rgn
```
### Update labels `+ labs()`
```{r}
p_rgn <- p_rgn +
labs(
title = 'Fisheries Revenue',
x = 'Year',
y = 'Millions $ (year 2015)',
color = 'Region'
)
p_rgn
```
### Multiple plots with `facet_wrap()`
```{r}
p_rgn +
facet_wrap(vars(region))
```
## Bar plot `+ geom_col()`
Note that `geom_col()` plots both an x and y variable (e.g. revenue by region), while
`geom_bar()` plots only an x variable (e.g. the number of revenue values for each region).
```{r}
library(glue)
library(lubridate)
# Get most recent year in the data
yr_max <- year(max(d_rgn$time))
# Plot revenue by region for most recent year
d_rgn %>%
# Filter by most recent time
filter(year(time) == yr_max) %>%
# Setup aesthetics
ggplot(aes(x = region, y = value, fill = region)) +
# Add geometry
geom_col() +
# Add labels
labs(
title = glue('Fisheries Revenue for {yr_max}'),
x = 'Region',
y = 'Millions $ (year 2015)',
fill = 'Region'
)
```
Contrast with `geom_bar()`. Also note that `color` only changes the outline color, while
`fill` changes the color of the entire bar.
```{r}
d_rgn %>%
ggplot(aes(x=region, color=region)) + geom_bar()
```
### Variation of series with `+ geom_boxplot()`
```{r}
d_rgn %>%
ggplot(aes(x = region, y = value, fill = region)) +
geom_boxplot() +
labs(
title = 'Fisheries Revenue Variability',
x = 'Region',
y = 'Millions $ (year 2015)') +
# Do not include a legend since it's redundant with the x axis
theme (
legend.position = 'none'
)
```
### Variation of series with `+ geom_violin()`
```{r}
p_rgn_violin <- d_rgn %>%
ggplot(aes(x = region, y = value, fill = region)) +
geom_violin() +
labs(
title = 'Fisheries Revenue Variability, Violin Plot',
x = 'Region',
y = 'Millions $ (year 2015)') +
theme(
legend.position = 'none'
)
p_rgn_violin
```
### Change theme with `theme()`
```{r}
p_rgn_violin +
theme_classic()
```
## Plot interactively with `plotly` or `dygraphs`
### Make ggplot interactive with `plotly::ggplotly()`
```{r}
plotly::ggplotly(p_rgn)
```
### Create interactive time series with `dygraphs::dygraph()`
This package is written more specifically for time series data. It requires wide format data.
```{r}
library(dygraphs)
d_rgn_wide <- d_rgn %>%
mutate(
Year = year(time)) %>%
select(Year, region, value) %>%
pivot_wider(
names_from = region,
values_from = value)
datatable(d_rgn_wide)
```
```{r}
d_rgn_wide %>%
dygraph() %>%
dyRangeSelector()
```
## Other packages of potential interest
- [trelliscopejs](https://cran.r-project.org/web/packages/trelliscopejs/vignettes/trelliscopejs.html)