-
Notifications
You must be signed in to change notification settings - Fork 0
/
manipulate.Rmd
159 lines (127 loc) · 2.74 KB
/
manipulate.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
---
title: "Manipulate"
author: "Diana LaScala-Gruenewald"
date: "6/28/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Read online table
### Download table ('*.csv')
```{r}
# Set variables
csv_url <- 'https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv'
dir_data <- 'data'
# Create derived variables
csv <- file.path(dir_data, basename(csv_url))
# Create directory
dir.create(dir_data)
# Download data
download.file(csv_url, csv)
```
### Read table `read.csv()`
```{r}
# Read data
d <- read.csv(csv)
# Show
# head(d)
```
The second line contains the units; we want to skip this.
```{r}
# Read data, skipping first two lines, no header
d <- read.csv(csv, skip=2, header=F)
# Update column names
names(d) <- names(read.csv(csv))
#head(d)
```
We can render these data frames in a nicer way.
### Show table `DT::datatable()`
```{r}
# Show table
DT::datatable(d)
```
## Wrangle data
### Manipulate with `dplyr`
```{r}
library(DT)
library(dplyr)
d <- d %>%
# Transform to tibble
tibble() %>%
# Change time from str to datetime
mutate(
time = as.Date(substr(time, 1, 10))
) %>%
# Select columns that start with total_fisheries_revenue
select(
time,
starts_with('total_fisheries_revenue')
) %>%
# Filter for rows in the last 40 years
filter(
time >= as.Date('1981-01-01')
)
datatable(d)
```
## Tidy with `tidyr`
```{r}
library(tidyr)
d <- d %>%
# Change from wide to long format
pivot_longer(-time)
datatable(d)
```
## Summarize wiht `dplyr`
```{r}
library(stringr)
# Get region column
d <- d %>%
# Replace total_fisheries_revenue in name column with ''
mutate(
region = str_replace(name, 'total_fisheries_revenue_', '')
) %>%
# Select time, region, and value columns (don't need name anymore)
select(
time,
region,
value
)
# Find average revenue by region
d_sum <- d %>%
# Group by region
group_by(region) %>%
# Take the mean
summarize(
avg_revenue = mean(value)
)
# Show data table with the avg_revenue column formatted as currency
datatable(d_sum) %>%
formatCurrency('avg_revenue')
```
## Apply functions with `purrr` on a `nest`'ed `tibble`
```{r}
library(purrr)
# Create nest
n <- d %>%
# Create grouping variable (region)
group_by(region) %>%
# Create nest of paired date, revenue points for each region
nest(
data = c(time, value)
)
n <- n %>%
# Create new columns lm and trend containing a linear model (revenue ~ date) and its slope
mutate(
lm = map(data, function(d){
lm(value ~ time, d)
}),
trend = map_dbl(lm, function(m){
coef(summary(m))['time', 'Estimate']
})
)
# Select just the region and the trend (slope) and display
n %>%
select(region, trend) %>%
datatable()
```