diff --git a/.Rbuildignore b/.Rbuildignore index a655369..ae0d771 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^renv$ +^renv\.lock$ ^.*\.Rproj$ ^\.Rproj\.user$ ^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 09aefbd..c0d5fbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Type: Package Package: tc.sensors Title: Retrieve Loop Detector Data from the MnDOT JSON Feed -Version: 0.2.3 -Date: 2023-07-21 +Version: 0.2.4 +Date: 2024-05-01 Authors@R: c( person("Metropolitan Council", role = "cph"), person("Liz", "Roten", , "liz.roten@metc.state.mn.us", role = c("cre", "aut"), comment = c(ORCID = "0000-0002-5346-3549")), - person("Nicole", "Sullivan", , "nicole.sullivan@metc.state.mn.us", role = "aut"), + person("Nicole", "Sullivan", , "nicole.sullivan@metc.state.mn.us", role = "ctb"), person("Ashley", "Asmus", , "ashley.asmus@metc.state.mn.us", role = "ctb", comment = c(ORCID = "0000-0001-5505-1372")), person("Yaxuan", "Zhang", , "yaxuan.zhang@metc.state.mn.us", role = "ctb", @@ -15,9 +15,9 @@ Authors@R: c( ) Description: Process data collected from Minnesota Department of Transportation (MnDOT) loop detectors installed on the Minnesota - Freeway system in 30-second interval measurements of occupancy and - volume, data which are pushed daily to a public JSON feed. Occupancy - and volume data can be used to calculate speed and delay. + Freeway system. Data are published to a public JSON feed in 30-second + interval measurements of occupancy and volume. Occupancy and volume + data can be used to calculate speed and delay. License: MIT + file LICENSE BugReports: https://github.com/Metropolitan-Council/tc.sensors/issues Imports: @@ -33,6 +33,7 @@ Imports: tibble (>= 3.2.1), tidyr (>= 1.3.1), tis (>= 1.39), + units (>= 0.8-5), utils, xml2 (>= 1.3.6) Suggests: diff --git a/NAMESPACE b/NAMESPACE index 8ece094..b0f6b7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,7 @@ importFrom(tidyr,separate) importFrom(tidyr,unite) importFrom(tis,day) importFrom(tis,isHoliday) +importFrom(units,as_units) importFrom(units,set_units) importFrom(utils,download.file) importFrom(utils,read.csv) diff --git a/R/aggregate_sensor.R b/R/aggregate_sensor.R index 60b9d97..fff452a 100644 --- a/R/aggregate_sensor.R +++ b/R/aggregate_sensor.R @@ -39,13 +39,16 @@ #' With 3,600 scans per minute, 60 minutes per hour there are 216,000 scans per hour. #' To find the number of scans in 15 minutes, we can multiply 0.25 * 216000 = 54,000 scans. #' -#' Speed is calculate by multiplying +#' Speed, in miles per hour, is calculate by multiplying the number of +#' vehicles per hour by field length in miles, divided by the +#' occupancy for the given interval. #' #' ## Impossible values #' #' Any observation with a volume that exceeds 20 vehicles or an occupancy that exceeds 1,800 scans #' will be replaced with `NA`. It is impossible for more than twenty vehicles to pass over a sensor -#' in only 30 seconds, and the maximum number of scans in 30 seconds is 1,800 (60 scans/second * 30 seconds). +#' in only 30 seconds, and the maximum number of scans in 30 seconds is 1,800 +#' (60 scans/second * 30 seconds). #' #' ### Interpolating missing values #' @@ -58,6 +61,7 @@ #' #' @import data.table #' @importFrom cli cli_abort +#' @importFrom units set_units as_units #' #' @examples #' \dontrun{ @@ -80,7 +84,9 @@ #' config = config_sample #' ) #' } -aggregate_sensor <- function(sensor_data, config, interval_length, +aggregate_sensor <- function(sensor_data, + config, + interval_length, replace_impossible = TRUE, interpolate_missing = FALSE, occupancy_pct_threshold = 0.0020) { @@ -95,10 +101,9 @@ aggregate_sensor <- function(sensor_data, config, interval_length, if (length(unique(sensor_data$date)) <= 1) { cli::cli_abort("For intervals greater than 24 hours, you must have data for more than one date") } - } - if (nrow(sensor_data) != 2880 * length(unique(sensor_data$date))) { + if (nrow(sensor_data) != (2880 * length(unique(sensor_data$date)))) { cli::cli_abort("For multiple dates, you must have at least 2,880 rows for each date you want covered.") } @@ -111,8 +116,14 @@ aggregate_sensor <- function(sensor_data, config, interval_length, config <- data.table::as.data.table(config)[detector_name == sensor_data$sensor[[1]]] # number of scans in the given interval length + # 3600 * 60 = 216,000 scans per hour interval_scans <- interval_length * 216000 field_length <- as.numeric(config[, "detector_field"][[1]]) + # convert field length from feet to miles + field_length_miles <- field_length %>% + units::as_units("feet") %>% + units::set_units("miles") %>% + as.numeric() if (replace_impossible == TRUE) { sensor_data <- tc.sensors::replace_impossible( @@ -121,29 +132,33 @@ aggregate_sensor <- function(sensor_data, config, interval_length, ) if (interpolate_missing == TRUE) { - sensor_data <- sensor_data[ , `:=`(volume.rollmean = data.table::shift( # calculate rolling average volume data.table::frollapply(volume, 3, mean, - align = "center", - na.rm = TRUE, - hasNA = TRUE - )) - ), - by = .(sensor)][ - , volume := ifelse(is.na(volume), volume.rollmean, volume)][ - # calculate rolling average occupancy - , `:=`(occupancy.rollmean = data.table::shift( - data.table::frollapply(occupancy, 3, mean, - align = "center", - na.rm = TRUE, - hasNA = TRUE - ) - )), - by = .(sensor)][ - , occupancy := ifelse(is.na(occupancy), occupancy.rollmean, occupancy)][ - , .(volume, occupancy, date, sensor, hour, min)] + align = "center", + na.rm = TRUE, + hasNA = TRUE + ) + )), + by = .(sensor) + ][ + , volume := ifelse(is.na(volume), volume.rollmean, volume) + ][ + # calculate rolling average occupancy + , `:=`(occupancy.rollmean = data.table::shift( + data.table::frollapply(occupancy, 3, mean, + align = "center", + na.rm = TRUE, + hasNA = TRUE + ) + )), + by = .(sensor) + ][ + , occupancy := ifelse(is.na(occupancy), occupancy.rollmean, occupancy) + ][ + , .(volume, occupancy, date, sensor, hour, min) + ] } } @@ -151,7 +166,7 @@ aggregate_sensor <- function(sensor_data, config, interval_length, interval_length_min <- interval_length * 60 n_rows_expected <- interval_length_min * 2 # two scans/observations per minute - # there are 60 scans/second + # there are 60 scans/second in the raw data # 60 * 30 = 1,800 scans/ 30 sec (the interval we are given) # 60 * 60 = 3,600 scans/minute # 3600 * 60 = 216,000 scans per hour @@ -165,10 +180,11 @@ aggregate_sensor <- function(sensor_data, config, interval_length, # add minute bins to sensor_data sensor_data[ - , interval_min_bin := findInterval(sensor_data$min, bins)][ - , start_min := min(min), - by = .(date, hour, interval_min_bin) - ] + , interval_min_bin := findInterval(sensor_data$min, bins) + ][ + , start_min := min(min), + by = .(date, hour, interval_min_bin) + ] sensor_data_agg <- sensor_data[ , as.list(unlist(lapply(.SD, function(x) { @@ -182,33 +198,38 @@ aggregate_sensor <- function(sensor_data, config, interval_length, ) }))), by = .(date, hour, start_min, interval_min_bin, sensor), - .SDcols = c("volume", "occupancy")][ - , start_datetime := as.character(as.POSIXct( - paste(date, hour, start_min), - format = "%Y-%m-%d %H %M"))][ - , occupancy.pct := (occupancy.sum / interval_scans)][ - # if volume is not 0 and occupancy.pct is gte occupancy threshold - , speed := ifelse(volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold, - # speed is volume * interval length in hours * field length - # divided by occupancy * feet to miles conversion - (volume.sum * (60 / interval_length_min) * field_length) / (5280 * occupancy.pct), - # otherwise, NA - NA - ) - ] - + .SDcols = c("volume", "occupancy") + ][ + , start_datetime := as.character(as.POSIXct( + paste(date, hour, start_min), + format = "%Y-%m-%d %H %M" + )) + ][ + , occupancy.pct := (occupancy.sum / interval_scans) + ][ + , speed := ifelse( + # if volume is not 0 and occupancy.pct is gte occupancy threshold + volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold, + # speed is volume * number of interval periods in a single hour * field length in miles + # divided by occupancy + (volume.sum * (60 / interval_length_min) * field_length_miles) / occupancy.pct, + # otherwise, NA + NA + ) + ] } else { # if the interval length is greater than or equal to 1 hour - # create hour bins bins <- seq(0, 24, interval_length) # add hour bins to sensor_data sensor_data[ - , date := data.table::as.IDate(date)][ - , year := data.table::year(date)][ - , interval_bin := findInterval(sensor_data$hour, bins) - ] + , date := data.table::as.IDate(date) + ][ + , year := data.table::year(date) + ][ + , interval_bin := findInterval(sensor_data$hour, bins) + ] # order by date data.table::setorder(sensor_data, date) @@ -225,20 +246,27 @@ aggregate_sensor <- function(sensor_data, config, interval_length, ) }))), by = .(date, interval_bin, sensor), - .SDcols = c("volume", "occupancy")][ - # if total occupancy is gte the number of interval scans, make NA - , occupancy.sum := ifelse(occupancy.sum >= interval_scans, NA, occupancy.sum)][ - # calculate occupancy percent of all interval scans - , occupancy.pct := (occupancy.sum / interval_scans)][ - # if total volume is not 0 and occupancy is occupancy.pct is gte occupancy threshold - , speed := ifelse(volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold, - # calculate speed - ((volume.sum * field_length) / - (5280 * occupancy.pct)) / interval_length, - # otherwise, speed is NA - NA - ) - ] + .SDcols = c("volume", "occupancy") + ][ + # if total occupancy is gte the number of interval scans, make NA + , occupancy.sum := ifelse(occupancy.sum >= interval_scans, NA, occupancy.sum) + ][ + # calculate occupancy percent of all interval scans + , occupancy.pct := (occupancy.sum / interval_scans) + ][ + , speed := ifelse( + # if volume is not 0 and occupancy percent meets threshold value, + volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold, + # calculate speed + # volume * field length in miles + # divided by occupancy + # all over the interval length in hours + ((volume.sum * field_length_miles) / + (occupancy.pct)) / interval_length, + # otherwise, speed is NA + NA + ) + ] } return(sensor_data_agg) diff --git a/man/aggregate_sensor.Rd b/man/aggregate_sensor.Rd index 74c44bb..88c7cab 100644 --- a/man/aggregate_sensor.Rd +++ b/man/aggregate_sensor.Rd @@ -62,7 +62,9 @@ With 60 scans per second, 60 seconds per minute there are 3,600 scans per minute With 3,600 scans per minute, 60 minutes per hour there are 216,000 scans per hour. To find the number of scans in 15 minutes, we can multiply 0.25 * 216000 = 54,000 scans. -Speed is calculate by multiplying +Speed, in miles per hour, is calculate by multiplying the number of +vehicles per hour by field length in miles, divided by the +occupancy for the given interval. }\if{html}{\out{}} } @@ -70,7 +72,8 @@ Speed is calculate by multiplying \if{html}{\out{