Skip to content

Commit

Permalink
KEXI data update July-October
Browse files Browse the repository at this point in the history
  • Loading branch information
xinxin-git committed Oct 31, 2023
1 parent 6b20a9b commit 5d65e23
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,29 @@ library(geosphere)
### INPUT DEFINITIONS ###

# set working directory
#setwd("D:/svn/shared-svn/projects/KelRide/data/KEXI/")
setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
setwd("D:/Module/vsp/shared-svn/")
#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")

# read data
VIArides2021 <- read.csv2("VIA_Rides_202106_202201.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2022_1 <- read.csv2("VIA_Rides_202201_202210.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2022_2 <- read.csv2("VIA_Rides_202210_202212.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_1 <- read.csv2("VIA_Rides_202212_202303.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_2 <- read.csv2("VIA_Rides_202304_202307.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIArides2023_3 <- read.csv2("VIA_Rides_202307_202310.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")

VIAridesAll <- union(VIArides2021, VIArides2022_1)
VIAridesAll <- union(VIAridesAll, VIArides2022_2)
VIAridesAll <- union(VIAridesAll, VIArides2023_1)
VIAridesAll <- union(VIAridesAll, VIArides2023_2) %>%
VIAridesAll <- union(VIAridesAll, VIArides2023_2)
VIAridesAll <- union(VIAridesAll, VIArides2023_3) %>%
filter(!is.na(Actual.Pickup.Time))

VIAridesSince2022 <- VIAridesAll %>%
filter(year(Actual.Pickup.Time) >= year(ymd("2022-01-01")))

datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIAridesSince2022, VIAridesAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIAdataSince2022","VIAdataAll")
datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIArides2023_3, VIAridesSince2022, VIAridesAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIA_data_202307_202310","VIAdataSince2022","VIAdataAll")
i <- 1

avgValues <- setNames(data.frame(matrix(ncol = 14, nrow = 0)), c("dataset", "avgBookingsPerDay", "avgDistance_<5km[m]", "avgDistance_withoutFilter[m]", "avgTravelTime[s]",
Expand Down
47 changes: 38 additions & 9 deletions src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
library(lubridate)
library(tidyverse)
library(dplyr)
Sys.setlocale("LC_TIME", "en_US.UTF-8")

# set working directory
setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
setwd("D:/Module/vsp/shared-svn/")

# read data
VIAdata2021 <- read.csv2("Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2022_1 <- read.csv2("Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2022_2 <- read.csv2("Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_1 <- read.csv2("Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_2 <- read.csv2("Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)
VIAdata2023_3 <- read.csv2("Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct-2023-Kelheim-Actual_Data_Jul-Oct(1)_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1)

# here it makes sense to switch to column names from 2022 data and newer as
# column names for all files but the 2021 data are the same
Expand Down Expand Up @@ -52,16 +55,32 @@ VIAdata2023_2 <- VIAdata2023_2 %>%
Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"),
Request.Creation.Time = ymd_hms(Request.Creation.Time))

#retrofit Reason.For.Travel column
#this line can only be executed once, afterwards columns are renamed and removed
VIAdata2023_3 <- VIAdata2023_3 %>%
rename("Anbietername" = X.1) %>%
mutate(Reason.For.Travel = case_when(
Anbietername == "RBO" ~ "DR",
Anbietername == "no vendor" ~ "AV",
TRUE ~ Reason.For.Travel
)) %>%
mutate(Ride.ID = NA,
Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"),
Request.Creation.Time = ymd_hms(Request.Creation.Time)) %>%
select(-X,-Anbietername)

write.csv2(VIAdata2021, "Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2022_1, "Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_edited_cleaned.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2022_2, "Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_1, "Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_2, "Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23_edited.csv", quote = FALSE, row.names = FALSE)
write.csv2(VIAdata2023_3, "Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct_2023-Kelheim-Actual_Data-Jul-Oct_23_edited.csv", quote = FALSE, row.names = FALSE)

allData <- union(VIAdata2021, VIAdata2022_1)
allData <- union(allData, VIAdata2022_2)
allData <- union(allData, VIAdata2023_1)
allData <- union(allData, VIAdata2023_2) %>%
allData <- union(allData, VIAdata2023_2)
allData <- union(allData, VIAdata2023_3) %>%
distinct(Request.ID, .keep_all = TRUE)

#filter
Expand All @@ -83,35 +102,43 @@ completedRides2023_1 <- VIAdata2023_1 %>%
completedRides2023_2 <- VIAdata2023_2 %>%
filter(Request.Status == "Completed")

saturday_rides <- completedRides %>%
completedRides2023_3 <- VIAdata2023_3 %>%
filter(Request.Status == "Completed")

saturday_rides <- completedRides %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2021 <- completedRides2021 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2022_1 <- completedRides2022_1 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2022_2 <- completedRides2022_2 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_1 <- completedRides2023_1 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_2 <- completedRides2023_2 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sa")
filter(weekday == "Sat")

saturday_rides2023_3 <- completedRides2023_3 %>%
mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>%
mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>%
filter(weekday == "Sat")

#dump output
write.csv2(completedRides, "VIA_Rides_202106_202303.csv", quote = FALSE, row.names = FALSE)
Expand All @@ -126,4 +153,6 @@ write.csv2(completedRides2023_1, "VIA_Rides_202212_202303.csv", quote = FALSE, r
write.csv2(saturday_rides2023_1, "VIA_Rides_Saturdays_202212_202303.csv", quote = FALSE, row.names = FALSE)
write.csv2(completedRides2023_2, "VIA_Rides_202304_202307.csv", quote = FALSE, row.names = FALSE)
write.csv2(saturday_rides2023_2, "VIA_Rides_Saturdays_202304_202307.csv", quote = FALSE, row.names = FALSE)
write.csv2(completedRides2023_3, "VIA_Rides_202307_202310.csv", quote = FALSE, row.names = FALSE)
write.csv2(saturday_rides2023_3, "VIA_Rides_Saturdays_202307_202310.csv", quote = FALSE, row.names = FALSE)

Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,30 @@ library(hrbrthemes)
### INPUT DEFINITIONS ###

# set working directory
setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/")
setwd("D:/Module/vsp/shared-svn/")

# read data
VIAdata2021 <- read.csv2("Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIAdata2022_1 <- read.csv2("Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_edited_cleaned.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIAdata2022_2 <- read.csv2("Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIAdata2023_1 <- read.csv2("Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIAdata2023_2 <- read.csv2("Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")
VIAdata2023_3 <- read.csv2("Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct_2023-Kelheim-Actual_Data-Jul-Oct_23_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="")


VIAdataAll <- union(VIAdata2021, VIAdata2022_1)
VIAdataAll <- union(VIAdataAll, VIAdata2022_2)
VIAdataAll <- union(VIAdataAll, VIAdata2023_1)
VIAdataAll <- union(VIAdataAll, VIAdata2023_2) %>%
VIAdataAll <- union(VIAdataAll, VIAdata2023_2)
VIAdataAll <- union(VIAdataAll, VIAdata2023_3) %>%
distinct(Request.ID, .keep_all = TRUE)

VIAdataSince2022 <- VIAdataAll %>%
filter(year(Actual.Pickup.Time) >= year(ymd("2022-01-01")))

datasets <- list(VIAdata2021, VIAdata2022_1, VIAdata2022_2, VIAdata2023_1, VIAdata2023_2, VIAdataSince2022, VIAdataAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIAdataSince2022","VIAdataAll")
datasets <- list(VIAdata2021, VIAdata2022_1, VIAdata2022_2, VIAdata2023_1, VIAdata2023_2,VIAdata2023_3, VIAdataSince2022, VIAdataAll)
names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIA_data_202307_202310","VIAdataSince2022","VIAdataAll")
i <- 1

print("Starting to print different plots!")
Expand Down

0 comments on commit 5d65e23

Please sign in to comment.