-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdplyr.Rmd
78 lines (61 loc) · 2.55 KB
/
dplyr.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
---
title: "Dplyr"
author: "Raghu Sidharthan"
output: html_document
---
[RBasic1](RBasic1),
[RBasic2](RBasic2),
[RBasic3](RBasic3),
[data.table](RDTable),
[dplyr](dplyr),
[tidyr](tidyr),
[RGIS](RGIS),
[Leaflet](Leaflet)
**1. Basic dplyr functions**
```{r,echo=TRUE,warning=FALSE,message=FALSE, results='hide',eval=F}
library(dplyr)
filter(flights, month == 1, day == 1) # filter by month and day
filter(flights, month == 1 | month == 2) # using and in filter
slice(flights, 1:10) # To select rows by position
arrange(flights, year, month, day) # Arrange rows in a order provided by the columns
arrange(flights, desc(arr_delay)) # Use desc() to order a column in descending order:
select(flights, year, month, day) # Select columns by name
select(flights, year:day) # Select all columns between year and day (inclusive)
select(flights, -(year:day)) # Select all columns except those from year to day (inclusive)
select(flights, tail_num = tailnum) # You can rename variables with select() by using named arguments:
rename(flights, tail_num = tailnum) #select() drops vars not mentioned, Instead, use rename(), tailnum is the orig name
distinct(flights, tailnum) # Use distinct()to find unique values in a table:
# Add new columns with mutate()
mutate(flights,
gain = arr_delay - dep_delay,
speed = distance / air_time * 60)
# mutate allows you to refer to columns that you have just created:
# If you only want to keep the new variables, use transmute():
#Summarise values with summarise()
summarise(flights,
delay = mean(dep_delay, na.rm = TRUE))
#Randomly sample rows with sample_n() and sample_frac()
sample_n(flights, 10) # for a fixed number
sample_frac(flights, 0.01) # for a fixed fraction
```
**2. Grouping functions and merge**
```{r,results='hide',echo=TRUE,eval=FALSE}
Trip <- Trip %>% arrange(SAMPN,arrtime2) %>%
group_by(SAMPN) %>% mutate(tripnum=1:n()) # Creates indexing by groups
#Merging data
S2SData <- S2SData %>%
left_join(StationNames,by = c('FromNode'='Node'))
```
**3. Misc**
```{r,results='hide',echo=TRUE,eval=FALSE}
# Example of reading files in a loop and renaming variable inside a loop using a string using rename_
for(iloop in 1:5){
hh_temp_summ <- hh_temp %>% group_by(maz) %>% summarise(num_hh=n()) %>% # read hh_temp based in iloop
rename_(.dots=setNames('num_hh', paste0('num_hh',iloop)))
assign(paste0('hhSumm',iloop),hh_temp_summ) # Assign to a summary dataset
}
# Apply a function in mutate row wise - can be useful if the function does not accept vectors
iris %>%
rowwise() %>%
mutate(Max.Len= max(Sepal.Length,Petal.Length))
```