Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
aba-sah authored Dec 13, 2023
1 parent 190f32f commit 52e8bea
Show file tree
Hide file tree
Showing 3 changed files with 322 additions and 148 deletions.
157 changes: 84 additions & 73 deletions code/r/cycling_and_walking/markdown/base/common-traffic_counters.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,40 +32,41 @@ cop_cycling_theme <-
)



parseCounterDataFromDB <-
function(counterData, glimpseContent = FALSE) {

counterData <- counterData %>%
mutate_at(vars(siteID, site, Location, countInterval, traffic_mode, Provider), as.factor) %>%
mutate(across(c(siteID, site, Location, countInterval, traffic_mode, Provider), as.factor)) %>%
relocate(Latitude, .before = Longitude) %>% # correction to order, will have no impact if not needed
relocate(Provider, .before = siteID) %>%
mutate_at(vars(fromDate, toDate), as_datetime) %>%
mutate(across(c(fromDate, toDate), as_datetime)) %>%

mutate(date = as_date(map_chr(str_split(localTimestamp, "T"), 1))) %>%
mutate(time = format(map_chr(str_split(localTimestamp, "T"), 2), format = "%H:%M:S"),
time = str_sub(time, 1, 5),
across(time, ~ str_sub(., 1, 5)),
hour = as.ordered(hour(as_datetime(map_chr(str_split(localTimestamp, "T"), 2), format = "%H:%M:%S"))),
year = as.ordered(year(date)),
month = month(date, label = TRUE),
weekday = wday(date, label = TRUE),
isWeekEnd = (as.integer(weekday) %in% c(1, 7)), #between(as.integer(weekday), 6, 7)) %>%
)
)


if (glimpseContent)
glimpse(counterData)

invisible(counterData)
invisible(counterData)
}


loadAndParseCounterData <-
function(pathToFile, provider = NULL, glimpseContent = FALSE) {

print(paste0("Parsing file '", pathToFile, "' ..."))


data_loaded <- read_csv(pathToFile, trim_ws = T) %>%
data_loaded <- read_csv(pathToFile, trim_ws = T) %>%
filter(rowSums(is.na(.)) != ncol(.))

if (is_null(provider))
Expand All @@ -77,20 +78,20 @@ loadAndParseCounterData <-
getMetadata(pathToFile),


data_loaded %>%
data_loaded %>%

pivot_longer(!time, names_to = "date", values_to = "count") %>%
separate("date", c(NA, "date"), sep = "\\s*\\n") %>%

mutate(date = parse_date(date, "%d-%b-%Y")) %>%
mutate(weekday = wday(date, label = TRUE),
month = month(date, label = TRUE),
year = year(date),
year = year(date),
Provider = provider) %>%

mutate_at(vars(count), as.integer) %>%
mutate_at(vars(weekday, month, year), as.factor) %>%
mutate(isWeekEnd = (as.integer(weekday) %in% c(1, 7))) %>% #between(as.integer(weekday), 6, 7)) %>%
mutate(across(count, as.integer),
across(c(weekday, month, year), as.factor),
isWeekEnd = (as.integer(weekday) %in% c(1, 7))) %>% #between(as.integer(weekday), 6, 7)) %>%
relocate(isWeekEnd, .after = "weekday") %>%

select(-c(count, Provider), everything(), count, Provider)
Expand All @@ -100,7 +101,7 @@ loadAndParseCounterData <-
if (glimpseContent)
glimpse(data_loaded)

invisible(data_loaded)
invisible(data_loaded)
}


Expand All @@ -125,35 +126,34 @@ getMetadata <-
}

metadata <- metadata %>%
mutate_at(vars(site, siteID, traffic_mode), as.factor) %>%
mutate(across(c(site, siteID, traffic_mode), as.factor)) %>%
select(-c(month, year))


metadata
metadata
}



# data and file structure differences in output to JSON from API
loadAndParseJsonCounterData <-
function(pathTofile, localAuthorityData, breakDownDates = FALSE, provider = NULL, glimpseContent = FALSE) {

print(paste0("Parsing file '", pathTofile, "' ..."))

counter_data <- fromJSON(pathTofile)
counter_data <- counter_data$crossingCountPerTimeInterval
counter_data <- fromJSON(pathTofile)
counter_data <- counter_data$crossingCountPerTimeInterval


if (is_null(provider))
provider <- default_provider

counter_data <- counter_data %>%
mutate(date = as_date(map_chr(str_split(localTimestamp, "T"), 1)),
mutate(date = as_date(map_chr(str_split(localTimestamp, "T"), 1)),
Provider = provider)

if (breakDownDates) {
counter_data <- counter_data %>%
#mutate_at("localTimestamp", as_datetime)
#mutate(across("localTimestamp", as_datetime))
#mutate(date = as_date(str_split(localTimestamp, "T", simplify = TRUE)[1])) %>% #bthis works, but just switching to map for consistency
#mutate(date = as_date(map_chr(str_split(localTimestamp, "T"), 1))) %>% # need it as interim regardless

Expand All @@ -164,22 +164,22 @@ loadAndParseJsonCounterData <-
month = month(date, label = TRUE),
weekday = wday(date, label = TRUE),
isWeekEnd = (as.integer(weekday) %in% c(1, 7)), #between(as.integer(weekday), 6, 7)) %>%
)
)
}


counter_data <- bind_cols(getMetadataFromJson(basename(pathTofile)), counter_data)
counter_data <- bind_cols(getMetadataFromJson(basename(pathTofile)), counter_data)

counter_data <- localAuthorityData %>%
#select(c(siteID, site, Location, RoadName, Latitude, Longitude)) %>%
select(-c(status, externalId, AltRoadName)) %>%
select(- c(status, externalId, AltRoadName)) %>%

right_join(counter_data, by = c("siteID" = "siteID")) %>%

# run filter, then remove interim columns
filter(((traffic_mode == "bicycle") & (date >= CycleCounter)) |
((traffic_mode == "pedestrian") & (date >= PedestrianCounter))) %>%
select(-c(LocalAuthority, RoadNumber, RoadType, CycleCounter, PedestrianCounter)) %>%
filter(((traffic_mode == "bicycle") & (date >= CycleCounter)) |
((traffic_mode == "pedestrian") & (date >= PedestrianCounter))) %>%
select(- c(LocalAuthority, RoadNumber, RoadType, CycleCounter, PedestrianCounter)) %>%
select(-Provider, everything(), Provider)


Expand All @@ -192,7 +192,7 @@ loadAndParseJsonCounterData <-
if (glimpseContent)
glimpse(counter_data)

invisible(counter_data)
invisible(counter_data)
}


Expand All @@ -212,15 +212,16 @@ getMetadataFromJson <-

metadata <- metadata %>%
map_df(function(x) {gsub("site", "", x)}) %>%
mutate_at(vars(everything()), ~ na_if(., "")) %>%
mutate(fromDate = parse_date_time(str_pad(fromDate, 8, "left", 0), "%d%m%Y"),
mutate(across(everything(), ~ na_if(., "")),
fromDate = parse_date_time(str_pad(fromDate, 8, "left", 0), "%d%m%Y"),
toDate = parse_date_time(str_pad(toDate, 8, "left", 0), "%d%m%Y")) %>%
rename(traffic_mode = vehicleClass) %>%
mutate_at(vars(traffic_mode), ~ tolower(.)) %>%
mutate_at(vars(siteID, countInterval, traffic_mode, trafficDirection, laneId), as.factor)
mutate(across(c(traffic_mode), tolower),
across(c(siteID, countInterval, traffic_mode, trafficDirection, laneId), as.factor)
)

metadata
}
metadata
}


loadAndParseMeteoData <-
Expand All @@ -235,10 +236,10 @@ loadAndParseMeteoData <-
filter(rowSums(is.na(.)) != ncol(.))

historical_weather <- historical_weather %>%
mutate_at(vars(year), as.integer) %>%
mutate(across(year, as.integer)) %>%
select(c(year, all_of(str_to_lower(month.abb)))) %>%

mutate_at(vars(!matches("year")), as.numeric) %>% # just in case any issues reading in
mutate(across(!matches("year")), as.numeric) %>% # just in case any issues reading in
rename_if(is.double, str_to_title) %>%

pivot_longer(!year, names_to = "month", values_to = metric) %>%
Expand All @@ -248,7 +249,7 @@ loadAndParseMeteoData <-
separate(metric, c("statistic", "metric"), sep = "_", fill = "left") %>%
relocate(metric, .before = statistic)


historical_weather <- historical_weather %>%
mutate(monthOfYear = parse_date(paste0(month, "-", year), format = "%b-%Y")) %>%

Expand Down Expand Up @@ -292,9 +293,9 @@ loadAndParseMeteoStationData <-
mutate_if(negate(is.numeric), parse_number) %>%

filter(year >= year(startDateFilter))%>%
mutate_at(vars(year), as.ordered) %>%

mutate_at(vars(month), ~ month(., label = TRUE)) %>%
mutate(across(year, as.ordered),
across(month, ~ month(., label = TRUE))
) %>%

pivot_longer(!c(year, month), names_to = "metric", values_to = "value") %>%
separate(metric, c("statistic", "metric"), sep = "_", fill = "left") %>%
Expand All @@ -304,8 +305,9 @@ loadAndParseMeteoStationData <-
filter(monthOfYear %within% interval(startDateFilter, endDateFilter)) %>%

mutate(region = region,
weather_station = weather_station) %>%
mutate_at(vars(region, weather_station, metric, statistic), as.factor) %>%
weather_station = weather_station,
across(c(region, weather_station, metric, statistic), as.factor)
) %>%
relocate(c(monthOfYear, region, weather_station), .after = month)


Expand All @@ -331,13 +333,13 @@ parseMeteoDataFromDB <-
function(historicalWeatherData, startDateFilter = NULL, endDateFilter = NULL, glimpseContent = FALSE) {

historicalWeatherData <- historicalWeatherData %>%
mutate_at(vars(year), as.integer) %>%
mutate_at(vars(year), as.ordered) %>%
mutate(month = ordered(month, levels = month.abb)) %>%

mutate_at(vars(region, weather_station, metric, statistic), as.factor)
mutate(across(year, as.integer),
across(year, as.ordered),
across(month, ~ ordered(., levels = month.abb)),
across(c(region, weather_station, metric, statistic), as.factor)
) %>%



historicalWeatherData <- historicalWeatherData %>%
mutate(monthOfYear = parse_date(paste0(month, "-", year), format = "%b-%Y")) %>%
relocate(monthOfYear, .after = month)
Expand All @@ -360,24 +362,23 @@ parseMeteoDataFromDB <-
}



loadAndParseTrafficSurveyData <-
function(pathTofile, localAuthorityData, countInterval = "quarter_hour", breakDownDates = FALSE, selectColumns = NULL, glimpseContent = FALSE) {

print(paste0("Parsing file '", pathTofile, "' ..."))


data_loaded <- read_csv(pathTofile, trim_ws = T, na = c("N/A", "No data", "No Data")) %>%
filter(rowSums(is.na(.)) != ncol(.))

data_loaded <- data_loaded %>%
mutate_at(vars(Code), ~ str_remove(., "Site\\s+")) %>%
mutate(across(Code, ~ str_remove(., "Site\\s+"))) %>%
#rename_with(., ~ (gsub("/|_|\\s+", "", .x)))
rename_with(., ~ (gsub("[^a-zA-Z]", "", .x))) #more general




join_params <- c("Code" = "SWSiteID")
if ("Settlement" %in% names(data_loaded)) # Site in quarter-hourly data, Settlement in hourly ..
join_params <- c("Settlement" = "LocationPlace", join_params)
Expand All @@ -398,10 +399,11 @@ loadAndParseTrafficSurveyData <-
data_loaded <- data_loaded %>%

mutate(TimePeriod = format(StartDateTime, format = "%H:%M"),
TimePeriod = str_sub(TimePeriod, end = 5),
across(TimePeriod, ~ str_sub(., end = 5)),
TimePeriodEnd = format(EndDateTime, format = "%H:%M"),
TimePeriodEnd = str_sub(TimePeriodEnd, end = 5),
TimePeriod = paste(TimePeriod, "-", TimePeriodEnd)) %>%
across(TimePeriodEnd, ~ str_sub(., end = 5)),
across(TimePeriod, ~ paste(., "-", TimePeriodEnd))
) %>%
select(-TimePeriodEnd) %>%
relocate(TimePeriod, .before = StartDateTime)
}
Expand All @@ -418,7 +420,9 @@ loadAndParseTrafficSurveyData <-
select(LocalAuthority, LocationPlace, SWSiteID, RoadNumber, RoadType),
by = join_params
) %>%
mutate(CountPeriod = paste0(month(Date, label = TRUE), "-", year(Date))) %>%
mutate(CountPeriod = paste0(month(Date, label = TRUE), "-", year(Date)),
across(CountPeriod, ~ fct_reorder(., Date)),
) %>%
select(CountPeriod, everything())


Expand All @@ -431,6 +435,10 @@ loadAndParseTrafficSurveyData <-
data_loaded <- data_loaded %>%
rename_with(~ c("RoadName", "Location"), c("Street", "Settlement"))
}
if (sum(c("LocationRoad", "Settlement") %in% names(data_loaded)) == 2) {
data_loaded <- data_loaded %>%
rename_with(~ c("RoadName", "Location"), c("LocationRoad", "Settlement"))
}
if ("Area" %in% names(data_loaded)) { # matches LocalAuthority

data_loaded <- data_loaded %>%
Expand All @@ -444,8 +452,9 @@ loadAndParseTrafficSurveyData <-


data_loaded <- data_loaded %>%
mutate(CountPeriodAsDate = parse_datetime(as.character(CountPeriod), format = "%b-%Y")) %>%
mutate(CountPeriod = fct_reorder(CountPeriod, CountPeriodAsDate)) %>%
mutate(CountPeriodAsDate = parse_datetime(as.character(CountPeriod), format = "%b-%Y"),
across(CountPeriod = fct_reorder(., CountPeriodAsDate))
) %>%
select(-CountPeriodAsDate)


Expand All @@ -472,22 +481,23 @@ loadAndParseTrafficSurveyData <-

data_loaded <- data_loaded %>%

mutate_at(intersect(names(data_loaded), params_as_count), as.integer) %>%
mutate(across(intersect(names(data_loaded), params_as_count), as.integer)) %>%
pivot_longer(all_of(names(transportation_modes)), names_to = "TransportationMode", values_to = "count") %>%
mutate_at(intersect(colnames(.), params_as_factor), as.factor) %>% #- doesn't include new columns
mutate(across(intersect(colnames(.), params_as_factor), as.factor)) %>% #- doesn't include new columns

relocate(LocalAuthority, .before = Location) %>%
relocate(c(RoadNumber, RoadType), .after = RoadName)


if (sum(c("Latitude", "Longitude") %in% names(data_loaded)) < 2) # at least one not set
data_loaded <- data_loaded %>%

mutate(Latitude = NA,
Longitude = NA) %>%
mutate_at(vars(Latitude, Longitude), as.double)
Longitude = NA,
across(c(Latitude, Longitude), as.double)
)



if (!is_null(selectColumns))
data_loaded <- data_loaded %>%
select(all_of(selectColumns))
Expand All @@ -500,19 +510,21 @@ loadAndParseTrafficSurveyData <-
}



parseTrafficSurveyDataFromDB <-
function(trafficSurveyData, breakDownDates = FALSE, glimpseContent = FALSE) {

trafficSurveyData <- trafficSurveyData %>%
select(- (any_of(traffic_direction_variables) & where(~ sum(is.na(.)) == nrow(trafficSurveyData)))) %>%
mutate_at(vars(CountPeriod, Code, LocalAuthority, Location, RoadName, RoadNumber, RoadType, TimePeriod, countInterval, TransportationMode), as.factor) %>%
mutate_at(vars(Date), as_date) %>%
mutate_at(vars(StartDateTime, EndDateTime), as_datetime)
mutate(across(c(CountPeriod, Code, LocalAuthority, Location, RoadName, RoadNumber, RoadType,
TimePeriod, countInterval, TransportationMode), as.factor),
across(Date, as_date),
across(c(StartDateTime, EndDateTime), as_datetime)
)

trafficSurveyData <- trafficSurveyData %>%
mutate(CountPeriodAsDate = parse_datetime(as.character(CountPeriod), format = "%b-%Y")) %>%
mutate(CountPeriod = fct_reorder(CountPeriod, CountPeriodAsDate)) %>%
mutate(CountPeriodAsDate = parse_datetime(as.character(CountPeriod), format = "%b-%Y"),
across(CountPeriod, ~ fct_reorder(., CountPeriodAsDate))
) %>%
select(-CountPeriodAsDate)


Expand All @@ -536,4 +548,3 @@ parseTrafficSurveyDataFromDB <-

invisible(trafficSurveyData)
}

Loading

0 comments on commit 52e8bea

Please sign in to comment.