update metadata, documentation

Metropolitan-Council · May 1, 2024 · 444c87b · 444c87b
1 parent 52379c0
commit 444c87b
Show file tree

Hide file tree

Showing 7 changed files with 110 additions and 77 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,3 +1,5 @@
+^renv$
+^renv\.lock$
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,23 +1,23 @@
 Type: Package
 Package: tc.sensors
 Title: Retrieve Loop Detector Data from the MnDOT JSON Feed
-Version: 0.2.3
-Date: 2023-07-21
+Version: 0.2.4
+Date: 2024-05-01
 Authors@R: c(
     person("Metropolitan Council", role = "cph"),
     person("Liz", "Roten", , "[email protected]", role = c("cre", "aut"),
            comment = c(ORCID = "0000-0002-5346-3549")),
-    person("Nicole", "Sullivan", , "[email protected]", role = "aut"),
+    person("Nicole", "Sullivan", , "[email protected]", role = "ctb"),
     person("Ashley", "Asmus", , "[email protected]", role = "ctb",
            comment = c(ORCID = "0000-0001-5505-1372")),
     person("Yaxuan", "Zhang", , "[email protected]", role = "ctb",
            comment = c(ORCID = "0000-0003-2343-8307"))
   )
 Description: Process data collected from Minnesota Department of
     Transportation (MnDOT) loop detectors installed on the Minnesota
-    Freeway system in 30-second interval measurements of occupancy and
-    volume, data which are pushed daily to a public JSON feed.  Occupancy
-    and volume data can be used to calculate speed and delay.
+    Freeway system. Data are published to a public JSON feed in 30-second
+    interval measurements of occupancy and volume. Occupancy and volume
+    data can be used to calculate speed and delay.
 License: MIT + file LICENSE
 BugReports: https://github.com/Metropolitan-Council/tc.sensors/issues
 Imports: 
@@ -33,6 +33,7 @@ Imports:
     tibble (>= 3.2.1),
     tidyr (>= 1.3.1),
     tis (>= 1.39),
+    units (>= 0.8-5),
     utils,
     xml2 (>= 1.3.6)
 Suggests: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -56,6 +56,7 @@ importFrom(tidyr,separate)
 importFrom(tidyr,unite)
 importFrom(tis,day)
 importFrom(tis,isHoliday)
+importFrom(units,as_units)
 importFrom(units,set_units)
 importFrom(utils,download.file)
 importFrom(utils,read.csv)

diff --git a/R/aggregate_sensor.R b/R/aggregate_sensor.R
@@ -39,13 +39,16 @@
 #'     With 3,600 scans per minute, 60 minutes per hour there are 216,000 scans per hour.
 #'     To find the number of scans in 15 minutes, we can multiply 0.25 * 216000 = 54,000 scans.
 #'
-#'     Speed is calculate by multiplying
+#'     Speed, in miles per hour, is calculate by multiplying the number of
+#'     vehicles per hour by field length in miles, divided by the
+#'     occupancy for the given interval.
 #'
 #'   ## Impossible values
 #'
 #'     Any observation with a volume that exceeds 20 vehicles or an occupancy that exceeds 1,800 scans
 #'     will be replaced with `NA`. It is impossible for more than twenty vehicles to pass over a sensor
-#'     in only 30 seconds, and the maximum number of scans in 30 seconds is 1,800 (60 scans/second * 30 seconds).
+#'     in only 30 seconds, and the maximum number of scans in 30 seconds is 1,800
+#'     (60 scans/second * 30 seconds).
 #'
 #'   ### Interpolating missing values
 #'
@@ -58,6 +61,7 @@
 #'
 #' @import data.table
 #' @importFrom cli cli_abort
+#' @importFrom units set_units as_units
 #'
 #' @examples
 #' \dontrun{
@@ -80,7 +84,9 @@
 #'   config = config_sample
 #' )
 #' }
-aggregate_sensor <- function(sensor_data, config, interval_length,
+aggregate_sensor <- function(sensor_data,
+                             config,
+                             interval_length,
                              replace_impossible = TRUE,
                              interpolate_missing = FALSE,
                              occupancy_pct_threshold = 0.0020) {
@@ -95,10 +101,9 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
     if (length(unique(sensor_data$date)) <= 1) {
       cli::cli_abort("For intervals greater than 24 hours, you must have data for more than one date")
     }
-
   }
 
-  if (nrow(sensor_data) != 2880 * length(unique(sensor_data$date))) {
+  if (nrow(sensor_data) != (2880 * length(unique(sensor_data$date)))) {
     cli::cli_abort("For multiple dates, you must have at least 2,880 rows for each date you want covered.")
   }
 
@@ -111,8 +116,14 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
   config <- data.table::as.data.table(config)[detector_name == sensor_data$sensor[[1]]]
 
   # number of scans in the given interval length
+  # 3600 * 60 = 216,000 scans per hour
   interval_scans <- interval_length * 216000
   field_length <- as.numeric(config[, "detector_field"][[1]])
+  # convert field length from feet to miles
+  field_length_miles <- field_length %>%
+    units::as_units("feet") %>%
+    units::set_units("miles") %>%
+    as.numeric()
 
   if (replace_impossible == TRUE) {
     sensor_data <- tc.sensors::replace_impossible(
@@ -121,37 +132,41 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
     )
 
     if (interpolate_missing == TRUE) {
-
       sensor_data <- sensor_data[
         , `:=`(volume.rollmean = data.table::shift(
           # calculate rolling average volume
           data.table::frollapply(volume, 3, mean,
-                                 align = "center",
-                                 na.rm = TRUE,
-                                 hasNA = TRUE
-          ))
-        ),
-        by = .(sensor)][
-          , volume := ifelse(is.na(volume), volume.rollmean, volume)][
-            # calculate rolling average occupancy
-            , `:=`(occupancy.rollmean = data.table::shift(
-              data.table::frollapply(occupancy, 3, mean,
-                                     align = "center",
-                                     na.rm = TRUE,
-                                     hasNA = TRUE
-              )
-            )),
-            by = .(sensor)][
-              , occupancy := ifelse(is.na(occupancy), occupancy.rollmean, occupancy)][
-                , .(volume, occupancy, date, sensor, hour, min)]
+            align = "center",
+            na.rm = TRUE,
+            hasNA = TRUE
+          )
+        )),
+        by = .(sensor)
+      ][
+        , volume := ifelse(is.na(volume), volume.rollmean, volume)
+      ][
+        # calculate rolling average occupancy
+        , `:=`(occupancy.rollmean = data.table::shift(
+          data.table::frollapply(occupancy, 3, mean,
+            align = "center",
+            na.rm = TRUE,
+            hasNA = TRUE
+          )
+        )),
+        by = .(sensor)
+      ][
+        , occupancy := ifelse(is.na(occupancy), occupancy.rollmean, occupancy)
+      ][
+        , .(volume, occupancy, date, sensor, hour, min)
+      ]
     }
   }
 
   # find interval length in minutes
   interval_length_min <- interval_length * 60
   n_rows_expected <- interval_length_min * 2 # two scans/observations per minute
 
-  # there are 60 scans/second
+  # there are 60 scans/second in the raw data
   # 60 * 30 = 1,800 scans/ 30 sec (the interval we are given)
   # 60 * 60 = 3,600 scans/minute
   # 3600 * 60 = 216,000 scans per hour
@@ -165,10 +180,11 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
 
     # add minute bins to sensor_data
     sensor_data[
-      , interval_min_bin := findInterval(sensor_data$min, bins)][
-        , start_min := min(min),
-        by = .(date, hour, interval_min_bin)
-      ]
+      , interval_min_bin := findInterval(sensor_data$min, bins)
+    ][
+      , start_min := min(min),
+      by = .(date, hour, interval_min_bin)
+    ]
 
     sensor_data_agg <- sensor_data[
       , as.list(unlist(lapply(.SD, function(x) {
@@ -182,33 +198,38 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
         )
       }))),
       by = .(date, hour, start_min, interval_min_bin, sensor),
-      .SDcols = c("volume", "occupancy")][
-        , start_datetime := as.character(as.POSIXct(
-          paste(date, hour, start_min),
-          format = "%Y-%m-%d %H %M"))][
-            , occupancy.pct := (occupancy.sum / interval_scans)][
-              # if volume is not 0 and occupancy.pct is gte occupancy threshold
-              , speed := ifelse(volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold,
-                                # speed is volume * interval length in hours * field length
-                                # divided by occupancy * feet to miles conversion
-                                (volume.sum * (60 / interval_length_min) * field_length) / (5280 * occupancy.pct),
-                                # otherwise, NA
-                                NA
-              )
-            ]
-
+      .SDcols = c("volume", "occupancy")
+    ][
+      , start_datetime := as.character(as.POSIXct(
+        paste(date, hour, start_min),
+        format = "%Y-%m-%d %H %M"
+      ))
+    ][
+      , occupancy.pct := (occupancy.sum / interval_scans)
+    ][
+      , speed := ifelse(
+        # if volume is not 0 and occupancy.pct is gte occupancy threshold
+        volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold,
+        # speed is volume * number of interval periods in a single hour * field length in miles
+        # divided by occupancy
+        (volume.sum * (60 / interval_length_min) * field_length_miles) / occupancy.pct,
+        # otherwise, NA
+        NA
+      )
+    ]
   } else {
     # if the interval length is greater than or equal to 1 hour
-
     # create hour bins
     bins <- seq(0, 24, interval_length)
 
     # add hour bins to sensor_data
     sensor_data[
-      , date := data.table::as.IDate(date)][
-        , year := data.table::year(date)][
-          , interval_bin := findInterval(sensor_data$hour, bins)
-        ]
+      , date := data.table::as.IDate(date)
+    ][
+      , year := data.table::year(date)
+    ][
+      , interval_bin := findInterval(sensor_data$hour, bins)
+    ]
 
     # order by date
     data.table::setorder(sensor_data, date)
@@ -225,20 +246,27 @@ aggregate_sensor <- function(sensor_data, config, interval_length,
         )
       }))),
       by = .(date, interval_bin, sensor),
-      .SDcols = c("volume", "occupancy")][
-        # if total occupancy is gte the number of interval scans, make NA
-        , occupancy.sum := ifelse(occupancy.sum >= interval_scans, NA, occupancy.sum)][
-          # calculate occupancy percent of all interval scans
-          , occupancy.pct := (occupancy.sum / interval_scans)][
-            # if total volume is not 0 and occupancy is occupancy.pct is gte occupancy threshold
-            , speed := ifelse(volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold,
-                              # calculate speed
-                              ((volume.sum * field_length) /
-                                 (5280 * occupancy.pct)) / interval_length,
-                              # otherwise, speed is NA
-                              NA
-            )
-          ]
+      .SDcols = c("volume", "occupancy")
+    ][
+      # if total occupancy is gte the number of interval scans, make NA
+      , occupancy.sum := ifelse(occupancy.sum >= interval_scans, NA, occupancy.sum)
+    ][
+      # calculate occupancy percent of all interval scans
+      , occupancy.pct := (occupancy.sum / interval_scans)
+    ][
+      , speed := ifelse(
+        # if volume is not 0 and occupancy percent meets threshold value,
+        volume.sum != 0 & occupancy.pct >= occupancy_pct_threshold,
+        # calculate speed
+        # volume * field length in miles
+        # divided by occupancy
+        # all over the interval length in hours
+        ((volume.sum * field_length_miles) /
+          (occupancy.pct)) / interval_length,
+        # otherwise, speed is NA
+        NA
+      )
+    ]
   }
 
   return(sensor_data_agg)

diff --git a/man/aggregate_sensor.Rd b/man/aggregate_sensor.Rd
diff --git a/man/tc.sensors-package.Rd b/man/tc.sensors-package.Rd
diff --git a/vignettes/tc.sensors.Rmd b/vignettes/tc.sensors.Rmd
@@ -83,6 +83,8 @@ $$\frac{Flow}{Density} $$
 
 The average flow that a roadway is losing, either due to low traffic or high congestion, throughout the sampling period. Capacity is calculated using Flow and Density.   
 
+If flow exceeds 1,800 vehicles per hour, the sensor is considered well performing. If flow is under 1,800 vehicles per hour and vehicle density is greater than 43 vehicles per mile, the sensor is at lost capacity; there is more traffic than the road can accommodate at free-flow speed. If the flow is under 1,800 vehicles per hour and vehicle density is less than 43 vehicles per mile, the sensor has spare capacity; there is less traffic than the road is built to handle.
+
 - Flow > 1800: 0, roadway operating at appropriate capacity
 - Density > 43: Lost Capacity: Flow - 1800
 - Density ≤ 43: Spare Capacity: 1800 - Flow