From 5d65e238fec1febfb8dacbb20a76c26c7d7c0d54 Mon Sep 17 00:00:00 2001 From: xinxin-git <82400603+xinxin-git@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:09:29 +0100 Subject: [PATCH] KEXI data update July-October --- .../KEXI-analysis-for-calibration-VIA.R | 12 +++-- .../VIA-data/KEXI-merge-data-VIA.R | 47 +++++++++++++++---- .../VIA-data/KEXI-plot-time-series-VIA.R | 11 +++-- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-analysis-for-calibration-VIA.R b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-analysis-for-calibration-VIA.R index 0928d4e9..f5e95682 100644 --- a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-analysis-for-calibration-VIA.R +++ b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-analysis-for-calibration-VIA.R @@ -16,8 +16,8 @@ library(geosphere) ### INPUT DEFINITIONS ### # set working directory -#setwd("D:/svn/shared-svn/projects/KelRide/data/KEXI/") -setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") +setwd("D:/Module/vsp/shared-svn/") +#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") # read data VIArides2021 <- read.csv2("VIA_Rides_202106_202201.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") @@ -25,18 +25,20 @@ VIArides2022_1 <- read.csv2("VIA_Rides_202201_202210.csv", stringsAsFactors = FA VIArides2022_2 <- read.csv2("VIA_Rides_202210_202212.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIArides2023_1 <- read.csv2("VIA_Rides_202212_202303.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIArides2023_2 <- read.csv2("VIA_Rides_202304_202307.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") +VIArides2023_3 <- read.csv2("VIA_Rides_202307_202310.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIAridesAll <- union(VIArides2021, VIArides2022_1) VIAridesAll <- union(VIAridesAll, VIArides2022_2) VIAridesAll <- union(VIAridesAll, VIArides2023_1) -VIAridesAll <- union(VIAridesAll, VIArides2023_2) %>% +VIAridesAll <- union(VIAridesAll, VIArides2023_2) +VIAridesAll <- union(VIAridesAll, VIArides2023_3) %>% filter(!is.na(Actual.Pickup.Time)) VIAridesSince2022 <- VIAridesAll %>% filter(year(Actual.Pickup.Time) >= year(ymd("2022-01-01"))) -datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIAridesSince2022, VIAridesAll) -names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIAdataSince2022","VIAdataAll") +datasets <- list(VIArides2021, VIArides2022_1, VIArides2022_2, VIArides2023_1, VIArides2023_2, VIArides2023_3, VIAridesSince2022, VIAridesAll) +names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIA_data_202307_202310","VIAdataSince2022","VIAdataAll") i <- 1 avgValues <- setNames(data.frame(matrix(ncol = 14, nrow = 0)), c("dataset", "avgBookingsPerDay", "avgDistance_<5km[m]", "avgDistance_withoutFilter[m]", "avgTravelTime[s]", diff --git a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R index 2cca48d1..7ba9e09b 100644 --- a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R +++ b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-merge-data-VIA.R @@ -1,9 +1,11 @@ library(lubridate) library(tidyverse) library(dplyr) +Sys.setlocale("LC_TIME", "en_US.UTF-8") # set working directory -setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") +#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") +setwd("D:/Module/vsp/shared-svn/") # read data VIAdata2021 <- read.csv2("Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1) @@ -11,6 +13,7 @@ VIAdata2022_1 <- read.csv2("Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Act VIAdata2022_2 <- read.csv2("Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1) VIAdata2023_1 <- read.csv2("Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1) VIAdata2023_2 <- read.csv2("Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1) +VIAdata2023_3 <- read.csv2("Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct-2023-Kelheim-Actual_Data_Jul-Oct(1)_raw.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", sep=",", skip = 1) # here it makes sense to switch to column names from 2022 data and newer as # column names for all files but the 2021 data are the same @@ -52,16 +55,32 @@ VIAdata2023_2 <- VIAdata2023_2 %>% Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"), Request.Creation.Time = ymd_hms(Request.Creation.Time)) +#retrofit Reason.For.Travel column +#this line can only be executed once, afterwards columns are renamed and removed +VIAdata2023_3 <- VIAdata2023_3 %>% + rename("Anbietername" = X.1) %>% + mutate(Reason.For.Travel = case_when( + Anbietername == "RBO" ~ "DR", + Anbietername == "no vendor" ~ "AV", + TRUE ~ Reason.For.Travel + )) %>% + mutate(Ride.ID = NA, + Reason.For.Travel = ifelse(Reason.For.Travel != "AV","DR","AV"), + Request.Creation.Time = ymd_hms(Request.Creation.Time)) %>% + select(-X,-Anbietername) + write.csv2(VIAdata2021, "Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_edited.csv", quote = FALSE, row.names = FALSE) write.csv2(VIAdata2022_1, "Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Actual_Data-VIA_Feb_to_Oct_2022_edited_cleaned.csv", quote = FALSE, row.names = FALSE) write.csv2(VIAdata2022_2, "Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22_edited.csv", quote = FALSE, row.names = FALSE) write.csv2(VIAdata2023_1, "Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023_edited.csv", quote = FALSE, row.names = FALSE) write.csv2(VIAdata2023_2, "Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23_edited.csv", quote = FALSE, row.names = FALSE) +write.csv2(VIAdata2023_3, "Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct_2023-Kelheim-Actual_Data-Jul-Oct_23_edited.csv", quote = FALSE, row.names = FALSE) allData <- union(VIAdata2021, VIAdata2022_1) allData <- union(allData, VIAdata2022_2) allData <- union(allData, VIAdata2023_1) -allData <- union(allData, VIAdata2023_2) %>% +allData <- union(allData, VIAdata2023_2) +allData <- union(allData, VIAdata2023_3) %>% distinct(Request.ID, .keep_all = TRUE) #filter @@ -83,35 +102,43 @@ completedRides2023_1 <- VIAdata2023_1 %>% completedRides2023_2 <- VIAdata2023_2 %>% filter(Request.Status == "Completed") -saturday_rides <- completedRides %>% +completedRides2023_3 <- VIAdata2023_3 %>% + filter(Request.Status == "Completed") + +saturday_rides <- completedRides %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") saturday_rides2021 <- completedRides2021 %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") saturday_rides2022_1 <- completedRides2022_1 %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") saturday_rides2022_2 <- completedRides2022_2 %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") saturday_rides2023_1 <- completedRides2023_1 %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") saturday_rides2023_2 <- completedRides2023_2 %>% mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% - filter(weekday == "Sa") + filter(weekday == "Sat") + +saturday_rides2023_3 <- completedRides2023_3 %>% + mutate(Actual.Pickup.Time = ymd_hms(Actual.Pickup.Time)) %>% + mutate(weekday = wday(Actual.Pickup.Time, label = TRUE)) %>% + filter(weekday == "Sat") #dump output write.csv2(completedRides, "VIA_Rides_202106_202303.csv", quote = FALSE, row.names = FALSE) @@ -126,4 +153,6 @@ write.csv2(completedRides2023_1, "VIA_Rides_202212_202303.csv", quote = FALSE, r write.csv2(saturday_rides2023_1, "VIA_Rides_Saturdays_202212_202303.csv", quote = FALSE, row.names = FALSE) write.csv2(completedRides2023_2, "VIA_Rides_202304_202307.csv", quote = FALSE, row.names = FALSE) write.csv2(saturday_rides2023_2, "VIA_Rides_Saturdays_202304_202307.csv", quote = FALSE, row.names = FALSE) +write.csv2(completedRides2023_3, "VIA_Rides_202307_202310.csv", quote = FALSE, row.names = FALSE) +write.csv2(saturday_rides2023_3, "VIA_Rides_Saturdays_202307_202310.csv", quote = FALSE, row.names = FALSE) diff --git a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-plot-time-series-VIA.R b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-plot-time-series-VIA.R index f36ad1ea..33d0902d 100644 --- a/src/main/R/drtDemandAnalysis/VIA-data/KEXI-plot-time-series-VIA.R +++ b/src/main/R/drtDemandAnalysis/VIA-data/KEXI-plot-time-series-VIA.R @@ -10,7 +10,8 @@ library(hrbrthemes) ### INPUT DEFINITIONS ### # set working directory -setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") +#setwd("C:/Users/Simon/Documents/shared-svn/projects/KelRide/data/KEXI/") +setwd("D:/Module/vsp/shared-svn/") # read data VIAdata2021 <- read.csv2("Via_data_2022-02-08/Data_request_TUB_for_Kelheim-Actual_Data-VIA_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") @@ -18,19 +19,21 @@ VIAdata2022_1 <- read.csv2("Via_data_2022-10-10/Data_request_TUB_for_Kelheim-Act VIAdata2022_2 <- read.csv2("Via_data_2023-01-17/Data_request_TUB_for_Kelheim-Actual_Data-Oct-Dec_2022-Data_TUB_for_Kelheim-Actual_Data-Oct_to_Dec_22_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIAdata2023_1 <- read.csv2("Via_data_2023-04-19/Data_request_TUB_for_Kelheim-Actual_Data-Jan-Mar_2023-Kelheim-Actual_Data-Jan-Mar_2023_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIAdata2023_2 <- read.csv2("Via_data_2023-07-10/Data_request_TUB_for_Kelheim-Actual_Data-Apr-Jul_2023-Kelheim-Actual_Data-Apr-Jul_23_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") +VIAdata2023_3 <- read.csv2("Via_data_2023-10-24/Data_request_TUB_for_Kelheim-Actual_Data-Jul-Oct_2023-Kelheim-Actual_Data-Jul-Oct_23_edited.csv", stringsAsFactors = FALSE, header = TRUE, encoding = "UTF-8", na.strings="") VIAdataAll <- union(VIAdata2021, VIAdata2022_1) VIAdataAll <- union(VIAdataAll, VIAdata2022_2) VIAdataAll <- union(VIAdataAll, VIAdata2023_1) -VIAdataAll <- union(VIAdataAll, VIAdata2023_2) %>% +VIAdataAll <- union(VIAdataAll, VIAdata2023_2) +VIAdataAll <- union(VIAdataAll, VIAdata2023_3) %>% distinct(Request.ID, .keep_all = TRUE) VIAdataSince2022 <- VIAdataAll %>% filter(year(Actual.Pickup.Time) >= year(ymd("2022-01-01"))) -datasets <- list(VIAdata2021, VIAdata2022_1, VIAdata2022_2, VIAdata2023_1, VIAdata2023_2, VIAdataSince2022, VIAdataAll) -names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIAdataSince2022","VIAdataAll") +datasets <- list(VIAdata2021, VIAdata2022_1, VIAdata2022_2, VIAdata2023_1, VIAdata2023_2,VIAdata2023_3, VIAdataSince2022, VIAdataAll) +names <- c("VIA_data_202106_202201","VIA_data_202201_202210","VIA_data_202210_202212","VIA_data_202212_202303","VIA_data_202304_202307","VIA_data_202307_202310","VIAdataSince2022","VIAdataAll") i <- 1 print("Starting to print different plots!")