Skip to content

Commit

Permalink
Merge branch 'master' into 3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow authored Nov 28, 2023
2 parents 66c5d4e + 190717f commit e22ebd1
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 38 deletions.
5 changes: 4 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ authors:
- given-names: Chengqi
family-names: Lu
email: [email protected]
orcid: 'https://orcid.org/0000-0002-1930-4675'
affiliation: Technische Universität Berlin
- given-names: Christian
family-names: Rakow
email: [email protected]
affiliation: Technische Universität Berlin
orcid: 'https://orcid.org/0000-0003-3760-2243'
- given-names: Simon
family-names: Meinhardt
email: [email protected]
orcid: 'https://orcid.org/0000-0002-6354-4529'
affiliation: Technische Universität Berlin
- given-names: Kai
family-names: Nagel
Expand All @@ -34,5 +37,5 @@ url: "https://github.com/matsim-scenarios/matsim-kelheim"
doi: 10.5281/zenodo.8322240
date-released: 2023-09-06
year: 2023
version: 3.0
version: 3.1
license: AGPL-3.0
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,35 @@ library(ggplot2)
library(plotly)
library(hrbrthemes)
library(geosphere)

Sys.setlocale("LC_TIME", "en_US.UTF-8")
#####################################################################
####################################################
### INPUT DEFINITIONS ###

# set working directory
#setwd("D:/svn/shared-svn/projects/KelRide/data/KEXI/")
setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
setwd("D:/Module/vsp/shared-svn/")
#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")

# read data
VIArides2021 <- read.csv2("VIA_Rides_202106_202201.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2022_1 <- read.csv2("VIA_Rides_202201_202210.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2022_2 <- read.csv2("VIA_Rides_202210_202212.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_1 <- read.csv2("VIA_Rides_202212_202303.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_2 <- read.csv2("VIA_Rides_202304_202307.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_3 <- read.csv2("VIA_Rides_202307_202310.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")

VIAridesAll <- union(VIArides2021, VIArides2022_1)
VIAridesAll <- union(VIAridesAll, VIArides2022_2)
VIAridesAll <- union(VIAridesAll, VIArides2023_1)
VIAridesAll <- union(VIAridesAll, VIArides2023_2) %>%
VIAridesAll <- union(VIAridesAll, VIArides2023_2)
VIAridesAll <- union(VIAridesAll, VIArides2023_3) %>%
filter(!is.na(Actual.Pickup.Time))

VIAridesSince2022 <- VIAridesAll %>%
filter(year(Actual.Pickup.Time) >= year(ymd("2022-01-01")))

datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIAridesSince2022, VIAridesAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIAdataSince2022","VIAdataAll")
datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIArides2023_3, VIAridesSince2022, VIAridesAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIA_data_202307_202310","VIAdataSince2022","VIAdataAll")
i <- 1

avgValues <- setNames(data.frame(matrix(ncol = 14, nrow = 0)), c("dataset", "avgBookingsPerDay", "avgDistance_<5km[m]", "avgDistance_withoutFilter[m]", "avgTravelTime[s]",
Expand Down Expand Up @@ -70,10 +72,10 @@ for(dataset in datasets) {
# write.csv2(noPUTime, "VIA_Rides_202106_202201_noPUTime.csv", quote = FALSE)

weekdayRides <- dataset %>%
filter(weekday != "Fr",
weekday != "Sa",
weekday != "So",
weekday != "Mo")
filter(weekday != "Fri",
weekday != "Sat",
weekday != "Sun",
weekday != "Mon")

#Possibly add a lockdown in late 2021 / early 2022 here,
# although the "low periods" observed in the "Zeitverlauf der Fahrten pro Tag (VIA)"-plot seem be explainable through holiday times (christmas and summer)
Expand Down Expand Up @@ -190,7 +192,8 @@ for(dataset in datasets) {

hist_TravelTime_s <- ggplot(j, aes(x=travelTime_s)) +
geom_histogram() +
labs(title=paste("Histogram of KEXI travel time for dataset", names[i]))
labs(title=paste("Histogram of KEXI travel time for dataset", names[i])) +
theme(plot.title = element_text(hjust=0.5, size = 10))

plotFile = paste0("plots/",names[i],"/hist_KEXI_travel_time_s.png")
paste0("printing plot to ", plotFile)
Expand All @@ -206,8 +209,8 @@ for(dataset in datasets) {
width=5, colour="red") +
labs(x="", y="travel time [s]", title=paste("Boxplot KEXI Travel Time for dataset", names[i])) +
# labs(x="", y="travel time [s]") + #for paper only
theme(plot.title = element_text(hjust=0.5, size=20, face="bold"), axis.text.y = element_text(size=24),
axis.title.y = element_text(size=25, face="bold"))
theme(plot.title = element_text(hjust=0.5, size=10, face="bold"), axis.text.y = element_text(size=8),
axis.title.y = element_text(size=15, face="bold"))

plotFile = paste0("plots/",names[i],"/boxplot_KEXI_travel_time_s.png")
paste0("printing plot to ", plotFile)
Expand All @@ -230,7 +233,8 @@ for(dataset in datasets) {

hist_distance_m <- ggplot(k, aes(x=distance_m)) +
geom_histogram() +
labs(title=paste("Histogram of KEXI travel distance for dataset", names[i]))
labs(title=paste("Histogram of KEXI travel distance for dataset", names[i])) +
theme(plot.title = element_text(hjust=0.5, size = 10))

plotFile = paste0("plots/",names[i],"/hist_KEXI_travel_distance_m.png")
paste0("printing plot to ", plotFile)
Expand All @@ -245,8 +249,8 @@ for(dataset in datasets) {
width=5, colour="red") +
labs(x="", y="travel distance [m]", title=paste("Boxplot KEXI Travel Distance for dataset", names[i])) +
# labs(x="", y="travel distance [m]") + #for paper only
theme(plot.title = element_text(hjust=0.5, size=20, face="bold"), axis.text.y = element_text(size=24),
axis.title.y = element_text(size=25, face="bold"))
theme(plot.title = element_text(hjust=0.5, size=10, face="bold"), axis.text.y = element_text(size=8),
axis.title.y = element_text(size=15, face="bold"))

plotFile = paste0("plots/",names[i],"/boxplot_KEXI_travel_distance_m.png")
paste0("printing plot to ", plotFile)
Expand Down Expand Up @@ -303,8 +307,8 @@ for(dataset in datasets) {
width=5, colour="red") +
labs(x="", y="bookings", title=paste("Boxplot KEXI bookings per day for dataset", names[i])) +
# labs(x="", y="travel distance [m]") + #for paper only
theme(plot.title = element_text(hjust=0.5, size=20, face="bold"), axis.text.y = element_text(size=24),
axis.title.y = element_text(size=25, face="bold"))
theme(plot.title = element_text(hjust=0.5, size=10, face="bold"), axis.text.y = element_text(size=8),
axis.title.y = element_text(size=15, face="bold"))

plotFile = paste0("plots/",names[i],"/boxplot_KEXI_daily_bookings.png")
paste0("printing plot to ", plotFile)
Expand Down
47 changes: 38 additions & 9 deletions src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
library(lubridate)
library(tidyverse)
library(dplyr)
Sys.setlocale("LC_TIME", "en_US.UTF-8")

# set working directory
setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
setwd("D:/Module/vsp/shared-svn/")

# read data
VIAdata2021 <- read.csv2("Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2022_1 <- read.csv2("Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2022_2 <- read.csv2("Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_1 <- read.csv2("Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_2 <- read.csv2("Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_3 <- read.csv2("Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct-2023-Kelheim-Actual_Data_Jul-Oct(1)_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)

# here it makes sense to switch to column names from 2022 data and newer as
# column names for all files but the 2021 data are the same
Expand Down Expand Up @@ -52,16 +55,32 @@ VIAdata2023_2 <- VIAdata2023_2 %>%
Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"),
Request.Creation.Time = ymd_hms(Request.Creation.Time))

#retrofit Reason.For.Travel column
#this line can only be executed once, afterwards columns are renamed and removed
VIAdata2023_3 <- VIAdata2023_3 %>%
rename("Anbietername" = X.1) %>%
mutate(Reason.For.Travel = case_when(
Anbietername == "RBO" ~ "DR",
Anbietername == "no vendor" ~ "AV",
TRUE ~ Reason.For.Travel
)) %>%
mutate(Ride.ID = NA,
Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"),
Request.Creation.Time = ymd_hms(Request.Creation.Time)) %>%
select(-X,-Anbietername)

write.csv2(VIAdata2021, "Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2022_1, "Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_edited_cleaned.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2022_2, "Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_1, "Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_2, "Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_3, "Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct_2023-Kelheim-Actual_Data-Jul-Oct_23_edited.csv", quote = FALSE, row.names = FALSE)

allData <- union(VIAdata2021, VIAdata2022_1)
allData <- union(allData, VIAdata2022_2)
allData <- union(allData, VIAdata2023_1)
allData <- union(allData, VIAdata2023_2) %>%
allData <- union(allData, VIAdata2023_2)
allData <- union(allData, VIAdata2023_3) %>%
distinct(Request.ID, .keep_all = TRUE)

#filter
Expand All @@ -83,35 +102,43 @@ completedRides2023_1 <- VIAdata2023_1 %>%
completedRides2023_2 <- VIAdata2023_2 %>%
filter(Request.Status == "Completed")

saturday_rides <- completedRides %>%
completedRides2023_3 <- VIAdata2023_3 %>%
filter(Request.Status == "Completed")

saturday_rides <- completedRides %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2021 <- completedRides2021 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2022_1 <- completedRides2022_1 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2022_2 <- completedRides2022_2 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_1 <- completedRides2023_1 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_2 <- completedRides2023_2 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_3 <- completedRides2023_3 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sat")

#dump output
write.csv2(completedRides, "VIA_Rides_202106_202303.csv", quote = FALSE, row.names = FALSE)
Expand All @@ -126,4 +153,6 @@ write.csv2(completedRides2023_1, "VIA_Rides_202212_202303.csv", quote = FALSE, r
write.csv2(saturday_rides2023_1, "VIA_Rides_Saturdays_202212_202303.csv", quote = FALSE, row.names = FALSE)
write.csv2(completedRides2023_2, "VIA_Rides_202304_202307.csv", quote = FALSE, row.names = FALSE)
write.csv2(saturday_rides2023_2, "VIA_Rides_Saturdays_202304_202307.csv", quote = FALSE, row.names = FALSE)
write.csv2(completedRides2023_3, "VIA_Rides_202307_202310.csv", quote = FALSE, row.names = FALSE)
write.csv2(saturday_rides2023_3, "VIA_Rides_Saturdays_202307_202310.csv", quote = FALSE, row.names = FALSE)

Loading

0 comments on commit e22ebd1

Please sign in to comment.