-
Notifications
You must be signed in to change notification settings - Fork 257
/
Copy pathcitibike.R
48 lines (30 loc) · 1.7 KB
/
citibike.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
library(tidyverse)
library(lubridate)
########################################
# READ AND TRANSFORM THE DATA
########################################
# read one month of data
trips <- read_csv('201402-citibike-tripdata.csv')
# replace spaces in column names with underscores
names(trips) <- gsub(' ', '_', names(trips))
# convert dates strings to dates
# trips <- mutate(trips, starttime = mdy_hms(starttime), stoptime = mdy_hms(stoptime))
# recode gender as a factor 0->"Unknown", 1->"Male", 2->"Female"
trips <- mutate(trips, gender = factor(gender, levels=c(0,1,2), labels = c("Unknown","Male","Female")))
########################################
# YOUR SOLUTIONS BELOW
########################################
# count the number of trips (= rows in the data frame)
# find the earliest and latest birth years (see help for max and min to deal with NAs)
# use filter and grepl to find all trips that either start or end on broadway
# do the same, but find all trips that both start and end on broadway
# find all unique station names
# count the number of trips by gender, the average trip time by gender, and the standard deviation in trip time by gender
# do this all at once, by using summarize() with multiple arguments
# find the 10 most frequent station-to-station trips
# find the top 3 end stations for trips starting from each start station
# find the top 3 most common station-to-station trips by gender
# find the day with the most trips
# tip: first add a column for year/month/day without time of day (use as.Date or floor_date from the lubridate package)
# compute the average number of trips taken during each of the 24 hours of the day across the entire month
# what time(s) of day tend to be peak hour(s)?