-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from bobaekang/master
Updating the fp repo
- Loading branch information
Showing
8 changed files
with
12,275 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,16 @@ | ||
# Final project | ||
|
||
# Instruction---------------------------------------# | ||
This is the repository for your final project. It is up to you to decide how to structure your files and directories. The final result will be a website created using [R Markdown](http://rmarkdown.rstudio.com/rmarkdown_websites.html). | ||
|
||
You can see the rendered example site [here](https://uc-cfss.github.io/fp-template/). | ||
You can see the rendered example site [here](https://uc-cfss.github.io/fp-template/). | ||
#---------------------------------------------------# | ||
|
||
# Summary of my project-----------------------------# | ||
Summary of my project in words will be provided here. | ||
#---------------------------------------------------# | ||
|
||
# Scripts-------------------------------------------# | ||
A brief explanation on each script files will be provided here. | ||
#---------------------------------------------------# | ||
|
536 changes: 536 additions & 0 deletions
536
data/Divvy_Trips_2016_Q1Q2/Divvy_Stations_2016_Q1Q2.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Download the raw data | ||
# Load library | ||
library(downloader) | ||
library(stringr) | ||
|
||
# Download the data | ||
# Divvy data | ||
download(url = "https://s3.amazonaws.com/divvy-data/tripdata/Divvy_Trips_2016_Q1Q2.zip", | ||
destfile = "data/Divvy_Trips_2016_Q1Q2.zip") | ||
|
||
# CTA data: stops and schedule | ||
download(url = "http://www.transitchicago.com/downloads/sch_data/google_transit.zip", | ||
destfile = "data/google_transit.zip") | ||
|
||
# unzip the file | ||
datazip <- list.files("data", pattern = "\\.zip$") | ||
|
||
for (zipfile in datazip){ | ||
filepath = str_c("data/", zipfile) | ||
unzip(filepath, exdir = "data") | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
##---------------------------------------------------------------## | ||
## This script reads Divvy and CTA data into R and tidy them, in ## | ||
## order to creat a tidy data frame for Divvy trips with spatial ## | ||
## variable, which classifies all trips into four different ## | ||
## groups, based on stations from and to which each trip was ## | ||
## made and on whether those stations are in proximity with ## | ||
## any CTA stop. ## | ||
##---------------------------------------------------------------## | ||
|
||
# Load libraries | ||
library(tidyverse) | ||
library(feather) | ||
|
||
## READ AND TRANSFORM THE CTA DATA | ||
## The following codes reads two CTA dataset concerning public transit stops and combine them | ||
# load and join the data | ||
CTAStops <- read_csv("data/stops.txt") | ||
CTAStopTimes <- read_csv("data/stop_times.txt") | ||
|
||
CTAStopTimeLocation <- left_join(CTAStopTimes, CTAStops, by = "stop_id") | ||
# drop less meaningful variables | ||
CTAStopTimeLocation <- CTAStopTimeLocation %>% | ||
select(-stop_sequence, -stop_headsign, -shape_dist_traveled, -stop_code, -stop_desc, -wheelchair_boarding) | ||
|
||
# Write the outcome into feather file and store | ||
write_feather(CTAStopTimeLocation, "data/CTA_Stop_time_location.feather") | ||
|
||
|
||
## READ AND TRANSFORM THE DIVVY DATA | ||
## The following codes read and join two Divvy dataset on 1) Divvy trips and 2) locations of Divvy stations. | ||
# Create a vector of the names of Divvy trip files | ||
DivvyAllFiles <- list.files("data/Divvy_Trips_2016_Q1Q2", pattern = "\\.csv$", full.names = TRUE) | ||
DivvyTripFiles <- DivvyAllFiles[2:5] | ||
|
||
# Use map function to read all four trip files and bind them | ||
DivvyTrip <- DivvyTripFiles %>% | ||
map(read_csv) %>% | ||
bind_rows() | ||
|
||
# Read the station file | ||
DivvyStation <- read_csv("data/Divvy_Trips_2016_Q1Q2/Divvy_Stations_2016_Q1Q2.csv") | ||
colnames(DivvyStation) <- c("id", "name", "lat", "lon", "dpcapacity", "online_date") | ||
|
||
# adding a proximity variable to station data | ||
Divvy_m <- cbind(DivvyStation$lon, DivvyStation$lat) | ||
CTA_m <- cbind(CTAStops$stop_lon, CTAStops$stop_lat) | ||
distance_m <- distm(Divvy_m, CTA_m, fun = distHaversine) # a 535 by 11520 matrix for distance | ||
|
||
distance150 <- distance_m <= 150 # check if the distance is <=150 meters or approximately 0.1 mile | ||
proximity150 <- (rowSums(distance150) > 0)*1 # a Divvy station is <=150m from any CTA stop, 1; otherwise, 0 | ||
DivvyStation$proximity <- proximity150 | ||
|
||
index150 <- which(distance_m <= 150, arr.ind = T) # matrix of indices where the distance is <= 150 | ||
for (i in range(1, ncol(index150))){ # switching the index number to id numbers | ||
Divvyindex <- index150[i,1] | ||
CTAindex <- index150[i,2] | ||
DivvyId <- DivvyStation$id[Divvyindex] | ||
CTAId <- CTAStops$stop_id[CTAindex] | ||
index150[i,1] <- DivvyId | ||
index150[i,2] <- CTAId | ||
} | ||
colnames(index150) <- c('id', 'stop_id') # matching the column names to those in `DivvyStation` and `CTAStops` | ||
index150 <- index150 %>% as_data_frame() | ||
test <- left_join(DivvyStation, index150) | ||
print(test, n = 30) | ||
|
||
# Adding spatial variables: lattitude and longitude of from and to stations | ||
FromStation <- DivvyStation %>% | ||
select(id, lon, lat, proximity) | ||
colnames(FromStation) <- c("from_station_id", "from_lon", "from_lat", "from_prox") | ||
|
||
ToStation <- DivvyStation %>% | ||
select(id, lon, lat, proximity) | ||
colnames(ToStation) <- c("to_station_id", "to_lon", "to_lat", "to_prox") | ||
|
||
DivvyData_from <- left_join(DivvyTrip, FromStation) | ||
DivvyData <- left_join(DivvyData_from, ToStation) | ||
|
||
# make starttime and stoptime variables time data | ||
DivvyData$starttime <- as.POSIXct(DivvyData$starttime, format = "%m/%d/%Y %H:%M", tz = "America/Chicago") | ||
DivvyData$stoptime <- as.POSIXct(DivvyData$stoptime, format = "%m/%d/%Y %H:%M", tz = "America/Chicago") | ||
# separate dates and time for starttime and stoptime variables | ||
DivvyData <- DivvyData %>% | ||
separate(starttime, c("starttime_ymd", "starttime_hms"), " ") %>% | ||
separate(stoptime, c("stoptime_ymd", "stoptime_hms"), " ") | ||
|
||
# Write the outcome into feather file and store | ||
write_feather(DivvyData, "data/Divvy_clean.feather") | ||
write_feather(DivvyStation, "data/Divvy_station.feather") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
##---------------------------------------------------------------## | ||
## This script further transform Divvy and CTA data in order to ## | ||
## add the temperal variable, which classifies all Divvy trips ## | ||
## into two different groups; ones likely to be multi-modal and ## | ||
## the others that are not. ## | ||
##---------------------------------------------------------------## | ||
|
||
# Load libraries | ||
library(tidyverse) | ||
library(feather) | ||
library(lubridate) | ||
|
||
# Read in and prepare data for tidying | ||
DivvyData <- read_feather("data/Divvy_clean.feather") | ||
CTAStopTimeLocation <- read_feather("data/CTA_Stop_time_location.feather") | ||
|
||
# Divvy trips that started at stations in proximity with CTA stops | ||
DivvyData_from_prox <- DivvyData %>% | ||
select(-stoptime_ymd, -stoptime_hms, -to_station_id, -to_station_name, -to_lon, -to_lat, -to_prox) %>% | ||
filter(from_prox == 1) | ||
DivvyData_from_prox$starttime_ymd <- ymd(DivvyData_from_prox$starttime_ymd) | ||
DivvyData_from_prox$starttime_hms <- hms(DivvyData_from_prox$starttime_hms) | ||
|
||
# Divvy trips that stopped at stations in proximity with CTA stops | ||
DivvyData_to_prox <- DivvyData %>% | ||
select(-starttime_ymd, -starttime_hms, -from_station_id, -from_station_name, -from_lon, -from_lat, -from_prox) %>% | ||
filter(to_prox == 1) | ||
DivvyData_to_prox$stoptime_ymd <- ymd(DivvyData_to_prox$stoptime_ymd) | ||
DivvyData_to_prox$stoptime_hms <- hms(DivvyData_to_prox$stoptime_hms) | ||
|
||
# CTA trip times, divided into arrivals and departures | ||
CTAStop_arr <- CTAStopTimeLocation %>% | ||
select(-departure_time) | ||
CTAStop_arr$arrival_time <- hms(CTAStop_arr$arrival_time) | ||
CTAStop_dep <- CTAStopTimeLocation %>% | ||
select(-arrival_time) | ||
CTAStop_dep$departure_time <- hms(CTAStop_dep$departure_time) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
##---------------------------------------------------------------## | ||
## This script reads Divvy station and CTA stop data into R, in ## | ||
## order to creat a tidy data frame for Divvy Station data frame ## | ||
## with spatial variable. This is a mini script for the Issue 02 ## | ||
##---------------------------------------------------------------## | ||
|
||
library(tidyverse) | ||
library(feather) | ||
|
||
# Read the CTA stop file | ||
CTAStops <- read_csv("data/stops.txt") | ||
|
||
# Read the Divvy station file | ||
DivvyStation <- read_csv("data/Divvy_Trips_2016_Q1Q2/Divvy_Stations_2016_Q1Q2.csv") | ||
colnames(DivvyStation) <- c("id", "name", "lat", "lon", "dpcapacity", "online_date") | ||
|
||
# adding a proximity variable to station data | ||
Divvy_m <- cbind(DivvyStation$lon, DivvyStation$lat) | ||
CTA_m <- cbind(CTAStops$stop_lon, CTAStops$stop_lat) | ||
distance_m <- distm(Divvy_m, CTA_m, fun = distHaversine) # a 535 by 11520 matrix for distance | ||
|
||
distance150 <- distance_m <= 150 # check if the distance is <=150 meters or approximately 0.1 mile | ||
proximity150 <- (rowSums(distance150) > 0)*1 # a Divvy station is <=150m from any CTA stop, 1; otherwise, 0 | ||
DivvyStation$proximity <- proximity150 | ||
|
||
index150 <- which(distance_m <= 150, arr.ind = T) # matrix of indices where the distance is <= 150 | ||
for (i in range(1, ncol(index150))){ # switching the index number to id numbers | ||
Divvyindex <- index150[i,1] | ||
CTAindex <- index150[i,2] | ||
DivvyId <- DivvyStation$id[Divvyindex] | ||
CTAId <- CTAStops$stop_id[CTAindex] | ||
index150[i,1] <- DivvyId | ||
index150[i,2] <- CTAId | ||
} | ||
colnames(index150) <- c('id', 'stop_id') # matching the column names to those in `DivvyStation` and `CTAStops` | ||
index150 <- index150 %>% as_data_frame() | ||
test <- left_join(DivvyStation, index150) | ||
print(test, n = 30) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
##---------------------------------------------------------------## | ||
## This script creats the necessary directories to store key ## | ||
## outputs of the project and runs all the scripts in order so ## | ||
## that they download, transform, and analyse the data as well ## | ||
## as visualize the analysis in a presentable format. ## | ||
##---------------------------------------------------------------## | ||
|
||
## clean out any previous work | ||
paths <- c("data", "graphics", "output") | ||
|
||
for(path in paths){ | ||
unlink(path, recursive = TRUE) # delete folder and contents | ||
dir.create(path) # create empty folder | ||
} | ||
|
||
## run my scripts | ||
source("fb-00_download-data.R") | ||
source("fb-01_tidy-data1.R") | ||
# rmarkdown::render("index.Rmd") | ||
# rmarkdown::render("about.Rmd") |