From 3d95e3604e77b18777a4900214b44e91d1c3e49f Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 00:26:22 +0000
Subject: [PATCH 01/19] add dummy arrival time because gtfs-segments updated

---
 rt_segment_speeds/scripts/cut_stop_segments.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/rt_segment_speeds/scripts/cut_stop_segments.py b/rt_segment_speeds/scripts/cut_stop_segments.py
index a5ef2d2f6..8a5a94475 100644
--- a/rt_segment_speeds/scripts/cut_stop_segments.py
+++ b/rt_segment_speeds/scripts/cut_stop_segments.py
@@ -64,6 +64,12 @@ def stop_times_with_shape(
         subset="geometry"
     ).reset_index(drop=True).set_geometry("geometry")
     
+    # Add a dummy arrival_time that is needed in gtfs_segments that is not NaT
+    # or else it'll throw error in gtfs_segments.create_segments. Use zero instead.
+    df = df.assign(
+        arrival_time = 0
+    )
+    
     return df
 
 

From a8796d38b0179316648b13e5adb41a01806101bf Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Fri, 13 Dec 2024 23:49:45 +0000
Subject: [PATCH 02/19] (refactor): nearest 5 vp and filter to 2

---
 .../scripts/new_nearest_vp_and_filter.py      | 133 +++++++++++++
 .../segment_speed_utils/neighbor.py           | 174 +++++++++++++++++-
 2 files changed, 305 insertions(+), 2 deletions(-)
 create mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py

diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
new file mode 100644
index 000000000..004f7e29d
--- /dev/null
+++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
@@ -0,0 +1,133 @@
+import dask_geopandas as dg
+import dask.dataframe as dd
+import datetime
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import shapely
+import sys
+
+from loguru import logger
+
+from shared_utils import rt_dates
+from segment_speed_utils import helpers, neighbor
+from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT
+from segment_speed_utils.project_vars import PROJECT_CRS
+
+
+def stop_times_for_shape_segments(
+    analysis_date: str,
+    dict_inputs: dict
+) -> gpd.GeoDataFrame:
+    """
+    This is the stop times table using only 1 shape for each 
+    route-direction. Every trip belong to that shape
+    will be cut along the same stops.
+    This allows us to aggregate segments across trips because each 
+    segment has the same stop_id1 and stop_id2.
+    """
+    SEGMENT_FILE = dict_inputs["segments_file"]
+    
+    rt_trips = helpers.import_unique_vp_trips(analysis_date)
+
+    shape_stop_combinations = pd.read_parquet(
+        f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet",
+        columns = ["trip_instance_key",
+                   "stop_id1", "stop_pair",
+                   "st_trip_instance_key"],
+        filters = [[
+            #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"),
+            ("trip_instance_key", "in", rt_trips)
+        ]]
+    ).rename(columns = {"stop_id1": "stop_id"})
+    
+    subset_trips = shape_stop_combinations.st_trip_instance_key.unique()
+    
+    stops_to_use = helpers.import_scheduled_stop_times(
+        analysis_date,
+        columns = ["trip_instance_key", "shape_array_key",
+                   "stop_sequence", "stop_id", "stop_pair",
+                   "stop_primary_direction", "geometry"],
+        filters = [[("trip_instance_key", "in", subset_trips)]],
+        get_pandas = True,
+        with_direction = True
+    ).rename(columns = {"trip_instance_key": "st_trip_instance_key"})
+    
+    stop_times = pd.merge(
+        stops_to_use,
+        shape_stop_combinations,
+        on = ["st_trip_instance_key", "stop_id", "stop_pair"],
+        how = "inner"
+    ).drop(
+        columns = "st_trip_instance_key"
+    ).drop_duplicates().reset_index(drop=True)
+        
+    return stop_times
+
+
+def new_nearest_neighbor_for_stop(
+    analysis_date: str,
+    segment_type = segment_type,
+    config_path = GTFS_DATA_DICT
+):
+    """
+    """
+    start = datetime.datetime.now()
+    
+    dict_inputs = config_path[segment_type]
+    trip_stop_cols = [*dict_inputs["trip_stop_cols"]]
+    EXPORT_FILE = dict_inputs["stage2c"]
+    
+    stop_times = stop_times_for_shape_segments(
+        analysis_date, 
+        dict_inputs
+    )
+    
+    gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
+    
+    vp_before, vp_after = np.vectorize(
+        neighbor.new_subset_arrays_to_valid_directions
+    )(
+        gdf.vp_primary_direction, 
+        gdf.vp_geometry, 
+        gdf.vp_idx,
+        gdf.stop_geometry,
+        gdf.stop_primary_direction,
+        gdf.shape_geometry,
+        gdf.stop_meters
+    )
+
+    gdf2 = gdf.assign(
+        before_vp_idx = vp_before,
+        after_vp_idx = vp_after,
+    )[trip_stop_cols + [
+        "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"]
+    ]
+        
+    del gdf, stop_times
+    
+    gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet")
+    
+    end = datetime.datetime.now()
+    logger.info(f"nearest neighbor for {segment_type} "
+                f"{analysis_date}: {end - start}")    
+        
+    return 
+
+    
+'''
+if __name__ == "__main__":
+    
+    from segment_speed_utils.project_vars import analysis_date_list
+    from dask import delayed, compute
+    
+    delayed_dfs = [
+        delayed(new_nearest_neighbor_for_stop)(
+            analysis_date = analysis_date,
+            segment_type = segment_type,
+            config_path = GTFS_DATA_DICT
+        ) for analysis_date in analysis_date_list
+    ]
+
+    [compute(i)[0] for i in delayed_dfs]
+'''
\ No newline at end of file
diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py
index ffa197f93..065da1330 100644
--- a/rt_segment_speeds/segment_speed_utils/neighbor.py
+++ b/rt_segment_speeds/segment_speed_utils/neighbor.py
@@ -8,7 +8,7 @@
 
 from calitp_data_analysis.geography_utils import WGS84
 from segment_speed_utils import gtfs_schedule_wrangling, vp_transform     
-from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT
+from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS
 from shared_utils import geo_utils
 
 
@@ -56,6 +56,50 @@ def merge_stop_vp_for_nearest_neighbor(
     return gdf
 
 
+def new_merge_stop_vp_for_nearest_neighbor(
+    stop_times: gpd.GeoDataFrame,
+    analysis_date: str,
+    **kwargs
+):
+    VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line
+    
+    vp_condensed = gpd.read_parquet(
+        f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet",
+        columns = ["trip_instance_key", 
+                   "vp_idx", "vp_primary_direction", 
+                   "geometry"],
+        **kwargs
+    ).to_crs(WGS84)
+
+    shapes = helpers.import_scheduled_shapes(
+        analysis_date,
+        columns = ["shape_array_key", "geometry"],
+        crs = PROJECT_CRS,
+        get_pandas = True,
+        filters = [[("shape_array_key", "in", stop_times.shape_array_key.tolist())]]
+    ).rename(columns = {"geometry": "shape_geometry"})
+    
+    gdf = pd.merge(
+        stop_times.rename(
+            columns = {"geometry": "stop_geometry"}
+        ).set_geometry("stop_geometry").to_crs(PROJECT_CRS),
+        vp_condensed.to_crs(PROJECT_CRS).rename(
+            columns = {"geometry": "vp_geometry"}),
+        on = "trip_instance_key",
+        how = "inner"
+    ).merge(
+        shapes,
+        on = "shape_array_key",
+        how = "inner"
+    )
+
+    gdf = gdf.assign(
+        stop_meters = gdf.shape_geometry.project(gdf.stop_geometry)
+    )
+
+    return gdf
+
+
 def subset_arrays_to_valid_directions(
     vp_direction_array: np.ndarray,
     vp_geometry: shapely.LineString,
@@ -126,4 +170,130 @@ def add_nearest_neighbor_result_array(
         nearest_vp_arr = nearest_vp_arr_series
     ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"])
     
-    return gdf2
\ No newline at end of file
+    return gdf2
+
+
+
+def find_nearest_points(
+    vp_coords_line: np.ndarray, 
+    target_stop: shapely.Point, 
+    vp_idx_array: np.ndarray,
+) -> np.ndarray:
+    """    
+    vp_coords_line is all the vehicle positions strung together as 
+    coordinates in a linestring.
+    The target point is a stop.
+
+    We want to find the k nearest points before/after a stop.
+    Start with k=5.
+    Returns an array that gives the indices that are the nearest k points 
+    (ex: nearest 5 vp to each stop).
+    """
+    indices = geo_utils.nearest_snap(
+        vp_coords_line, 
+        target_stop, 
+        k_neighbors = 5
+    )
+        
+    # nearest neighbor returns self.N 
+    # if there are no nearest neighbor results found
+    # if we want 10 nearest neighbors and 8th, 9th, 10th are all
+    # the same result, the 8th will have a result, then 9th and 10th will
+    # return the length of the array (which is out-of-bounds)
+    indices2 = indices[indices < vp_idx_array.size]
+    
+    return indices2
+
+
+def filter_to_nearest2_vp(
+    vp_coords_line: np.ndarray,
+    shape_geometry: shapely.LineString,
+    vp_idx_array: np.ndarray,
+    stop_meters: float,
+    indices_of_nearest: np.ndarray,
+) -> tuple[np.ndarray]:
+    """
+    Take the indices that are the nearest.
+    Filter the vp coords down and project those against the shape_geometry.
+    Calculate how close those nearest k vp are to a stop (as they travel along a shape).
+    
+    Filter down to the nearest 2 vp before and after a stop.
+    If there isn't one before or after, a value of -1 is returned.
+    """
+    # Subset the array of vp coords and vp_idx_array with 
+    # the indices that show the nearest k neighbors.
+    nearest_vp = vp_coords_line[indices_of_nearest]
+    nearest_vp_idx = vp_idx_array[indices_of_nearest]
+    
+    # Project these vp coords to shape geometry and see how far it is
+    # from the stop's position on the shape
+    nearest_vp_projected = np.asarray(
+        [shape_geometry.project(shapely.Point(i)) - stop_meters 
+         for i in nearest_vp]
+    )
+
+    # Negative values are before the stop
+    # Positive values are vp after the stop
+    before_indices = np.where(nearest_vp_projected < 0)[0]
+    after_indices = np.where(nearest_vp_projected > 0)[0]
+    
+    # Grab the closest vp before a stop (-1 means array was empty)
+    if before_indices.size > 0:
+        before = nearest_vp_idx[before_indices][-1] 
+    else:
+        before = -1
+        
+    # Grab the closest vp after a stop (-1 means array was empty)
+    if after_indices.size > 0:
+        after = nearest_vp_idx[after_indices][0]
+    else:
+        after = -1
+    
+    return before, after
+
+
+def new_subset_arrays_to_valid_directions(
+    vp_direction_array: np.ndarray,
+    vp_geometry: shapely.LineString,
+    vp_idx_array: np.ndarray,
+    stop_geometry: shapely.Point,
+    stop_direction: str,
+    shape_geometry: shapely.LineString,
+    stop_meters: float
+) -> np.ndarray: 
+    """
+    Each row stores several arrays related to vp.
+    vp_direction is an array, vp_idx is an array,
+    and the linestring of vp coords can be coerced into an array.
+    
+    When we're doing nearest neighbor search, we want to 
+    first filter the full array down to valid vp
+    before snapping it.
+    """    
+    opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] 
+    
+    # These are the valid index values where opposite direction 
+    # is excluded       
+    valid_indices = (vp_direction_array != opposite_direction).nonzero()   
+
+    # These are vp coords where index values of opposite direction is excluded
+    valid_vp_coords_line = np.array(vp_geometry.coords)[valid_indices]
+    
+    # These are the subset of vp_idx values where opposite direction is excluded
+    valid_vp_idx_arr = np.asarray(vp_idx_array)[valid_indices]  
+    
+    nearest_indices = find_nearest_points(
+        valid_vp_coords_line, 
+        stop_geometry, 
+        valid_vp_idx_arr,
+    )
+    
+    before_vp, after_vp = filter_to_nearest2_vp(
+        valid_vp_coords_line,
+        shape_geometry,
+        valid_vp_idx_arr,
+        stop_meters,
+        nearest_indices,
+    )
+    
+    return before_vp, after_vp
\ No newline at end of file

From 533c7392017d5ce061f5ab4407d7d11165633bb6 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Fri, 13 Dec 2024 23:50:28 +0000
Subject: [PATCH 03/19] could stage2c replace stage2/stage2b outputs?

---
 _shared_utils/shared_utils/gtfs_analytics_data.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml
index 5391c75b8..96ffcb1b7 100644
--- a/_shared_utils/shared_utils/gtfs_analytics_data.yml
+++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml
@@ -74,6 +74,7 @@ stop_segments:
   stage1: ${speeds_tables.vp_dwell}
   stage2: "nearest/nearest_vp_shape_segments"
   stage2b: "nearest/nearest2_vp_shape_segments"
+  stage2c: "nearest/wide_nearest2_vp_shape_segments"
   stage3: "stop_arrivals"
   stage4: "speeds_stop_segments"
   trip_stop_cols: ["trip_instance_key", "stop_sequence"]

From bc84b9ff8a5142c51235085186c5fb33ec3bca5c Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Sat, 14 Dec 2024 01:05:13 +0000
Subject: [PATCH 04/19] (refactor): add vp shape meters for nearest neighbor
 results

---
 .../scripts/new_nearest_vp_and_filter.py      | 27 +++++++++++------
 .../segment_speed_utils/neighbor.py           | 30 +++++++++++--------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
index 004f7e29d..63c299718 100644
--- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
+++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
@@ -1,5 +1,3 @@
-import dask_geopandas as dg
-import dask.dataframe as dd
 import datetime
 import geopandas as gpd
 import numpy as np
@@ -36,7 +34,6 @@ def stop_times_for_shape_segments(
                    "stop_id1", "stop_pair",
                    "st_trip_instance_key"],
         filters = [[
-            #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"),
             ("trip_instance_key", "in", rt_trips)
         ]]
     ).rename(columns = {"stop_id1": "stop_id"})
@@ -67,7 +64,7 @@ def stop_times_for_shape_segments(
 
 def new_nearest_neighbor_for_stop(
     analysis_date: str,
-    segment_type = segment_type,
+    segment_type: str,
     config_path = GTFS_DATA_DICT
 ):
     """
@@ -85,7 +82,7 @@ def new_nearest_neighbor_for_stop(
     
     gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
     
-    vp_before, vp_after = np.vectorize(
+    vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize(
         neighbor.new_subset_arrays_to_valid_directions
     )(
         gdf.vp_primary_direction, 
@@ -100,8 +97,12 @@ def new_nearest_neighbor_for_stop(
     gdf2 = gdf.assign(
         before_vp_idx = vp_before,
         after_vp_idx = vp_after,
+        before_vp_meters = vp_before_m, 
+        after_vp_meters = vp_after_m
     )[trip_stop_cols + [
-        "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"]
+        "shape_array_key", "stop_meters", 
+        "before_vp_idx", "after_vp_idx",
+        "before_vp_meters", "after_vp_meters"]
     ]
         
     del gdf, stop_times
@@ -115,11 +116,20 @@ def new_nearest_neighbor_for_stop(
     return 
 
     
-'''
+
 if __name__ == "__main__":
     
-    from segment_speed_utils.project_vars import analysis_date_list
+    #from segment_speed_utils.project_vars import analysis_date_list
+    
     from dask import delayed, compute
+    LOG_FILE = "../logs/test.log"
+    logger.add(LOG_FILE, retention="3 months")
+    logger.add(sys.stderr, 
+               format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", 
+               level="INFO")
+    
+    analysis_date_list = [rt_dates.DATES["sep2024"]]
+    segment_type = "stop_segments"
     
     delayed_dfs = [
         delayed(new_nearest_neighbor_for_stop)(
@@ -130,4 +140,3 @@ def new_nearest_neighbor_for_stop(
     ]
 
     [compute(i)[0] for i in delayed_dfs]
-'''
\ No newline at end of file
diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py
index 065da1330..f9c19ea2c 100644
--- a/rt_segment_speeds/segment_speed_utils/neighbor.py
+++ b/rt_segment_speeds/segment_speed_utils/neighbor.py
@@ -7,7 +7,7 @@
 import shapely
 
 from calitp_data_analysis.geography_utils import WGS84
-from segment_speed_utils import gtfs_schedule_wrangling, vp_transform     
+from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform     
 from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS
 from shared_utils import geo_utils
 
@@ -69,7 +69,7 @@ def new_merge_stop_vp_for_nearest_neighbor(
                    "vp_idx", "vp_primary_direction", 
                    "geometry"],
         **kwargs
-    ).to_crs(WGS84)
+    )
 
     shapes = helpers.import_scheduled_shapes(
         analysis_date,
@@ -228,28 +228,32 @@ def filter_to_nearest2_vp(
     # Project these vp coords to shape geometry and see how far it is
     # from the stop's position on the shape
     nearest_vp_projected = np.asarray(
-        [shape_geometry.project(shapely.Point(i)) - stop_meters 
+        [shape_geometry.project(shapely.Point(i)) 
          for i in nearest_vp]
     )
 
     # Negative values are before the stop
     # Positive values are vp after the stop
-    before_indices = np.where(nearest_vp_projected < 0)[0]
-    after_indices = np.where(nearest_vp_projected > 0)[0]
+    before_indices = np.where(nearest_vp_projected - stop_meters < 0)[0]
+    after_indices = np.where(nearest_vp_projected - stop_meters > 0)[0]
     
     # Grab the closest vp before a stop (-1 means array was empty)
     if before_indices.size > 0:
-        before = nearest_vp_idx[before_indices][-1] 
+        before_idx = nearest_vp_idx[before_indices][-1] 
+        before_vp_meters = nearest_vp_projected[before_indices][-1]
     else:
-        before = -1
+        before_idx = -1
+        before_vp_meters = 0
         
     # Grab the closest vp after a stop (-1 means array was empty)
     if after_indices.size > 0:
-        after = nearest_vp_idx[after_indices][0]
+        after_idx = nearest_vp_idx[after_indices][0]
+        after_vp_meters = nearest_vp_projected[after_indices][0]
     else:
-        after = -1
+        after_idx = -1
+        after_vp_meters = 0
     
-    return before, after
+    return before_idx, after_idx, before_vp_meters, after_vp_meters
 
 
 def new_subset_arrays_to_valid_directions(
@@ -287,8 +291,8 @@ def new_subset_arrays_to_valid_directions(
         stop_geometry, 
         valid_vp_idx_arr,
     )
-    
-    before_vp, after_vp = filter_to_nearest2_vp(
+ 
+    before_vp, after_vp, before_meters, after_meters = filter_to_nearest2_vp(
         valid_vp_coords_line,
         shape_geometry,
         valid_vp_idx_arr,
@@ -296,4 +300,4 @@ def new_subset_arrays_to_valid_directions(
         nearest_indices,
     )
     
-    return before_vp, after_vp
\ No newline at end of file
+    return before_vp, after_vp, before_meters, after_meters
\ No newline at end of file

From 4a26425adffacd05f53dfc1af07b222eee139b67 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Tue, 17 Dec 2024 18:53:44 +0000
Subject: [PATCH 05/19] rename columns in nearest vp output

---
 .../scripts/new_nearest_vp_and_filter.py           | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
index 63c299718..9a9b21fff 100644
--- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
+++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
@@ -82,7 +82,7 @@ def new_nearest_neighbor_for_stop(
     
     gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
     
-    vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize(
+    vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
         neighbor.new_subset_arrays_to_valid_directions
     )(
         gdf.vp_primary_direction, 
@@ -95,14 +95,14 @@ def new_nearest_neighbor_for_stop(
     )
 
     gdf2 = gdf.assign(
-        before_vp_idx = vp_before,
-        after_vp_idx = vp_after,
-        before_vp_meters = vp_before_m, 
-        after_vp_meters = vp_after_m
+        prior_vp_idx = vp_before,
+        subseq_vp_idx = vp_after,
+        prior_vp_meters = vp_before_meters, 
+        subseq_vp_meters = vp_after_meters
     )[trip_stop_cols + [
         "shape_array_key", "stop_meters", 
-        "before_vp_idx", "after_vp_idx",
-        "before_vp_meters", "after_vp_meters"]
+        "prior_vp_idx", "subseq_vp_idx",
+        "prior_vp_meters", "subseq_vp_meters"]
     ]
         
     del gdf, stop_times

From 94496c2296f61a7f33ecb0ee88ce4fd182376f34 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Tue, 17 Dec 2024 18:54:24 +0000
Subject: [PATCH 06/19] (refactor): stop arrival interpolation is better set up
 now, remove intermediate steps

---
 .../scripts/interpolate_stop_arrival.py       | 101 ++++++------------
 1 file changed, 30 insertions(+), 71 deletions(-)

diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
index a155b74c1..c8b5a50c5 100644
--- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py
+++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
@@ -1,4 +1,10 @@
 """
+Interpolate stop arrival 
+based on where the nearest 2 vp are 
+when stop position is between the 2 vp.
+
+Stop and vp geometries should be projected along the shape geometry,
+Use `stop_meters`, `prior_vp_meters`, `subseq_vp_meters`.
 """
 import datetime
 import geopandas as gpd
@@ -33,62 +39,6 @@ def get_vp_timestamps(
     return vp 
 
 
-def consolidate_surrounding_vp(
-    df: pd.DataFrame, 
-    group_cols: list,
-) -> pd.DataFrame:
-    """
-    This reshapes the df to wide so that each stop position has
-    a prior and subseq timestamp (now called vp_timestamp_local).
-    """
-    df = df.assign(
-        obs = (df.sort_values(group_cols + ["vp_idx"])
-               .groupby(group_cols, 
-                        observed=True, group_keys=False, dropna=False)
-               .cumcount()
-            )
-    )
-    
-    group_cols2 = group_cols + ["stop_meters"]
-    prefix_cols = ["vp_idx", "shape_meters"]
-    timestamp_cols = ["location_timestamp_local", "moving_timestamp_local"]
-    # since shape_meters actually might be decreasing as time progresses,
-    # (bus moving back towards origin of shape)
-    # we don't actually know that the smaller shape_meters is the first timestamp
-    # nor the larger shape_meters is the second timestamp.
-    # all we know is that stop_meters (stop) falls between these 2 shape_meters.
-    # sort by timestamp, and set the order to be 0, 1    
-    vp_before_stop = df.loc[df.obs==0][group_cols2 + prefix_cols + timestamp_cols]
-    vp_after_stop = df.loc[df.obs==1][group_cols2 + prefix_cols + timestamp_cols]
-    
-    # For the vp before the stop occurs, we want the maximum timestamp
-    # of the last position
-    # We want to keep the moving_timestamp (which is after it's dwelled)
-    vp_before_stop = vp_before_stop.assign(
-        prior_vp_timestamp_local = vp_before_stop.moving_timestamp_local,
-    ).rename(
-        columns = {**{i: f"prior_{i}" for i in prefix_cols}}
-    ).drop(columns = timestamp_cols)
-    
-    # For the vp after the stop occurs, we want the minimum timestamp
-    # of that next position
-    # Keep location_timetamp (before it dwells)
-    vp_after_stop = vp_after_stop.assign(
-        subseq_vp_timestamp_local = vp_after_stop.location_timestamp_local,
-    ).rename(
-        columns = {**{i: f"subseq_{i}" for i in prefix_cols}}
-    ).drop(columns = timestamp_cols)
-    
-    df_wide = pd.merge(
-        vp_before_stop,
-        vp_after_stop,
-        on = group_cols2,
-        how = "inner"
-    )
-
-    return df_wide
-
-
 def add_arrival_time(
     nearest_vp_input_file: str,
     vp_timestamp_file: str,
@@ -106,7 +56,11 @@ def add_arrival_time(
         f"{SEGMENT_GCS}{nearest_vp_input_file}_{analysis_date}.parquet"
     )
     
-    subset_vp = vp_filtered.vp_idx.unique()
+    subset_vp = np.unique(
+        np.concatenate(
+        (vp_filtered.prior_vp_idx.unique(), 
+         vp_filtered.subseq_vp_idx.unique())
+    )).tolist()
     
     vp_timestamps = get_vp_timestamps(
         vp_timestamp_file, 
@@ -116,10 +70,14 @@ def add_arrival_time(
     
     df = pd.merge(
         vp_filtered,
-        vp_timestamps,
-        on = "vp_idx",
+        vp_timestamps.add_prefix("prior_"),
+        on = "prior_vp_idx",
+        how = "inner"
+    ).merge(
+        vp_timestamps.add_prefix("subseq_"),
+        on = "subseq_vp_idx",
         how = "inner"
-    ).pipe(consolidate_surrounding_vp, group_cols)
+    )
         
     arrival_time_series = []
     
@@ -128,13 +86,13 @@ def add_arrival_time(
         stop_position = getattr(row, "stop_meters")
         
         projected_points = np.asarray([
-            getattr(row, "prior_shape_meters"), 
-            getattr(row, "subseq_shape_meters")
+            getattr(row, "prior_vp_meters"), 
+            getattr(row, "subseq_vp_meters")
         ])
         
         timestamp_arr = np.asarray([
-            getattr(row, "prior_vp_timestamp_local"),
-            getattr(row, "subseq_vp_timestamp_local"),
+            getattr(row, "prior_moving_timestamp_local"),
+            getattr(row, "subseq_location_timestamp_local"),
         ])
         
         
@@ -215,12 +173,13 @@ def enforce_monotonicity_and_interpolate_across_stops(
     )
     
     # Subset to trips that have at least 1 obs that violates monotonicity
-    trips_with_one_false = (df.groupby("trip_instance_key")
-                        .agg({"arrival_time_sec_monotonic": "min"})
-                        .reset_index()
-                        .query('arrival_time_sec_monotonic==0')
-                        .trip_instance_key
-                        )
+    trips_with_one_false = (
+        df.groupby("trip_instance_key")
+        .agg({"arrival_time_sec_monotonic": "min"})
+        .reset_index()
+        .query('arrival_time_sec_monotonic==0')
+        .trip_instance_key
+    )
     
     # Set arrival times to NaT if it's not monotonically increasing
     mask = df.arrival_time_sec_monotonic == False 
@@ -254,7 +213,7 @@ def interpolate_stop_arrivals(
     dict_inputs = config_path[segment_type]
     trip_stop_cols = [*dict_inputs["trip_stop_cols"]]
     USABLE_VP_FILE = dict_inputs["stage1"]
-    INPUT_FILE = dict_inputs["stage2b"]
+    INPUT_FILE = dict_inputs["stage2c"]
     STOP_ARRIVALS_FILE = dict_inputs["stage3"]
 
     start = datetime.datetime.now()

From 06770ca6f3781ab669e0a7c2d9a9778ae28f6d2d Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 00:09:11 +0000
Subject: [PATCH 07/19] (refactor): refactored changes put into scripts

---
 .../scripts/interpolate_stop_arrival.py       |   2 +-
 .../scripts/nearest_vp_to_stop.py             |  50 ++++---
 .../scripts/pipeline_rt_stop_times.py         |  17 +--
 .../scripts/pipeline_segment_speeds.py        |  14 --
 .../scripts/pipeline_speedmap.py              |  12 --
 .../segment_speed_utils/neighbor.py           | 131 ++----------------
 rt_segment_speeds/setup.py                    |   2 +-
 7 files changed, 42 insertions(+), 186 deletions(-)

diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
index c8b5a50c5..4031a0c87 100644
--- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py
+++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
@@ -213,7 +213,7 @@ def interpolate_stop_arrivals(
     dict_inputs = config_path[segment_type]
     trip_stop_cols = [*dict_inputs["trip_stop_cols"]]
     USABLE_VP_FILE = dict_inputs["stage1"]
-    INPUT_FILE = dict_inputs["stage2c"]
+    INPUT_FILE = dict_inputs["stage2"]
     STOP_ARRIVALS_FILE = dict_inputs["stage3"]
 
     start = datetime.datetime.now()
diff --git a/rt_segment_speeds/scripts/nearest_vp_to_stop.py b/rt_segment_speeds/scripts/nearest_vp_to_stop.py
index 25e59f742..2ea13aae0 100644
--- a/rt_segment_speeds/scripts/nearest_vp_to_stop.py
+++ b/rt_segment_speeds/scripts/nearest_vp_to_stop.py
@@ -12,7 +12,6 @@
 from typing import Literal, Optional
 
 from calitp_data_analysis.geography_utils import WGS84
-from calitp_data_analysis import utils
 from segment_speed_utils import helpers, neighbor
 from update_vars import SEGMENT_GCS, GTFS_DATA_DICT
 from segment_speed_utils.project_vars import SEGMENT_TYPES
@@ -155,31 +154,40 @@ def nearest_neighbor_for_stop(
     else:
         print(f"{segment_type} is not valid")
     
-    gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
-        stop_times, analysis_date)
-        
-    results = neighbor.add_nearest_neighbor_result_array(gdf, analysis_date)
-          
-    # Keep columns from results that are consistent across segment types 
-    # use trip_stop_cols as a way to uniquely key into a row 
-    keep_cols = trip_stop_cols + [
-        "shape_array_key",
-        "stop_geometry",
-        "nearest_vp_arr"
-    ]
     
-    utils.geoparquet_gcs_export(
-        results[keep_cols],
-        SEGMENT_GCS,
-        EXPORT_FILE,
+    gdf = neighbor.merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
+    
+    vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
+        neighbor.subset_arrays_to_valid_directions
+    )(
+        gdf.vp_primary_direction, 
+        gdf.vp_geometry, 
+        gdf.vp_idx,
+        gdf.stop_geometry,
+        gdf.stop_primary_direction,
+        gdf.shape_geometry,
+        gdf.stop_meters
     )
+
+    gdf2 = gdf.assign(
+        prior_vp_idx = vp_before,
+        subseq_vp_idx = vp_after,
+        prior_vp_meters = vp_before_meters, 
+        subseq_vp_meters = vp_after_meters
+    )[trip_stop_cols + [
+        "shape_array_key", "stop_meters", 
+        "prior_vp_idx", "subseq_vp_idx",
+        "prior_vp_meters", "subseq_vp_meters"]
+    ]
+        
+    del gdf, stop_times
+    
+    gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}.parquet")
     
     end = datetime.datetime.now()
     logger.info(f"nearest neighbor for {segment_type} "
-                f"{analysis_date}: {end - start}")
-    
-    del gdf, stop_times, results
-
+                f"{analysis_date}: {end - start}")   
+          
     return
 
 '''
diff --git a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
index d428933a3..a37199c96 100644
--- a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
+++ b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
@@ -9,7 +9,6 @@
 from loguru import logger
 
 from nearest_vp_to_stop import nearest_neighbor_for_stop
-from vp_around_stops import filter_to_nearest_two_vp
 from interpolate_stop_arrival import interpolate_stop_arrivals
 from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals
 from update_vars import GTFS_DATA_DICT
@@ -36,21 +35,7 @@
     ]
 
     [compute(i)[0] for i in delayed_dfs]
-    
-    del delayed_dfs
-    
-    delayed_dfs = [
-        delayed(filter_to_nearest_two_vp)(
-            analysis_date = analysis_date,
-            segment_type = segment_type,
-            config_path = GTFS_DATA_DICT
-        ) for analysis_date in analysis_date_list
-    ]
-    
-    [compute(i)[0] for i in delayed_dfs]
-
-    del delayed_dfs
-
+        
     logger.remove()
 
 
diff --git a/rt_segment_speeds/scripts/pipeline_segment_speeds.py b/rt_segment_speeds/scripts/pipeline_segment_speeds.py
index df3862fcd..fe8084eba 100644
--- a/rt_segment_speeds/scripts/pipeline_segment_speeds.py
+++ b/rt_segment_speeds/scripts/pipeline_segment_speeds.py
@@ -9,7 +9,6 @@
 from loguru import logger
 
 from nearest_vp_to_stop import nearest_neighbor_for_stop
-from vp_around_stops import filter_to_nearest_two_vp
 from interpolate_stop_arrival import interpolate_stop_arrivals
 from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals
 from update_vars import GTFS_DATA_DICT
@@ -40,19 +39,6 @@
     
     del delayed_dfs
     
-    
-    delayed_dfs = [
-        delayed(filter_to_nearest_two_vp)(
-            analysis_date = analysis_date,
-            segment_type = segment_type,
-            config_path = GTFS_DATA_DICT
-        ) for analysis_date in analysis_date_list
-    ]
-    
-    [compute(i)[0] for i in delayed_dfs]
-
-    del delayed_dfs
-
     logger.remove()
     
     LOG_FILE = "../logs/interpolate_stop_arrival.log"
diff --git a/rt_segment_speeds/scripts/pipeline_speedmap.py b/rt_segment_speeds/scripts/pipeline_speedmap.py
index 545047217..293db2e44 100644
--- a/rt_segment_speeds/scripts/pipeline_speedmap.py
+++ b/rt_segment_speeds/scripts/pipeline_speedmap.py
@@ -14,7 +14,6 @@
 from update_vars import SEGMENT_GCS, GTFS_DATA_DICT
 
 from nearest_vp_to_stop import nearest_neighbor_for_stop
-from vp_around_stops import filter_to_nearest_two_vp
 from interpolate_stop_arrival import interpolate_stop_arrivals
 from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals
 
@@ -88,17 +87,6 @@ def concatenate_speedmap_proxy_arrivals_with_remaining(
     ]
 
     [compute(i)[0] for i in delayed_dfs]
-  
-
-    delayed_dfs = [
-        delayed(filter_to_nearest_two_vp)(
-            analysis_date = analysis_date,
-            segment_type = segment_type,
-            config_path = GTFS_DATA_DICT
-        ) for analysis_date in analysis_date_list
-    ]
-    
-    [compute(i)[0] for i in delayed_dfs]
 
     logger.remove()
 
diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py
index f9c19ea2c..f5d21ce84 100644
--- a/rt_segment_speeds/segment_speed_utils/neighbor.py
+++ b/rt_segment_speeds/segment_speed_utils/neighbor.py
@@ -6,61 +6,21 @@
 import pandas as pd
 import shapely
 
-from calitp_data_analysis.geography_utils import WGS84
-from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform     
+from segment_speed_utils import helpers, vp_transform     
 from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS
 from shared_utils import geo_utils
 
 
-def add_nearest_vp_idx(
-    vp_linestring: shapely.LineString, 
-    stop: shapely.Point, 
-    vp_idx_arr: np.ndarray
-) -> int:
-    """
-    Index into where the nearest vp is to the stop,
-    and return that vp_idx value from the vp_idx array.
-    """
-    idx = geo_utils.nearest_snap(vp_linestring, stop, k_neighbors=1)
-    
-    return vp_idx_arr[idx]
-
-    
 def merge_stop_vp_for_nearest_neighbor(
     stop_times: gpd.GeoDataFrame,
     analysis_date: str,
     **kwargs
-) -> gpd.GeoDataFrame:
-    VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line
-    
-    vp_condensed = gpd.read_parquet(
-        f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet",
-        columns = ["trip_instance_key", 
-                   "vp_idx", "vp_primary_direction", 
-                   "geometry"],
-        **kwargs
-    ).to_crs(WGS84)
-
-    gdf = pd.merge(
-        stop_times.rename(
-            columns = {"geometry": "stop_geometry"}
-        ).set_geometry("stop_geometry").to_crs(WGS84),
-        vp_condensed.rename(
-            columns = {
-                "geometry": "vp_geometry"
-            }),
-        on = "trip_instance_key",
-        how = "inner"
-    )
-        
-    return gdf
-
-
-def new_merge_stop_vp_for_nearest_neighbor(
-    stop_times: gpd.GeoDataFrame,
-    analysis_date: str,
-    **kwargs
 ):
+    """
+    Merge stop times file with vp.
+    vp gdf has been condensed so that all the vp coords
+    make up coordinates of a linestring.
+    """
     VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line
     
     vp_condensed = gpd.read_parquet(
@@ -93,6 +53,9 @@ def new_merge_stop_vp_for_nearest_neighbor(
         how = "inner"
     )
 
+    # Calculate stop_meters, which is the stop geometry
+    # projected onto shape_geometry and is interpreted as
+    # stop X is Y meters along shape
     gdf = gdf.assign(
         stop_meters = gdf.shape_geometry.project(gdf.stop_geometry)
     )
@@ -100,80 +63,6 @@ def new_merge_stop_vp_for_nearest_neighbor(
     return gdf
 
 
-def subset_arrays_to_valid_directions(
-    vp_direction_array: np.ndarray,
-    vp_geometry: shapely.LineString,
-    vp_idx_array: np.ndarray,
-    stop_geometry: shapely.Point,
-    stop_direction: str,
-) -> np.ndarray: 
-    """
-    Each row stores several arrays related to vp.
-    vp_direction is an array, vp_idx is an array,
-    and the linestring of vp coords can be coerced into an array.
-    
-    When we're doing nearest neighbor search, we want to 
-    first filter the full array down to valid vp
-    before snapping it.
-    """
-    N_NEAREST_POINTS = 10
-    
-    opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] 
-    
-    # These are the valid index values where opposite direction 
-    # is excluded       
-    valid_indices = (vp_direction_array != opposite_direction).nonzero()   
-
-    vp_coords_line = np.array(vp_geometry.coords)[valid_indices]
-    
-    vp_idx_arr = np.asarray(vp_idx_array)[valid_indices]  
-            
-    np_inds = geo_utils.nearest_snap(
-        vp_coords_line, stop_geometry, N_NEAREST_POINTS
-    )
-        
-    # nearest neighbor returns self.N 
-    # if there are no nearest neighbor results found
-    # if we want 10 nearest neighbors and 8th, 9th, 10th are all
-    # the same result, the 8th will have a result, then 9th and 10th will
-    # return the length of the array (which is out-of-bounds)
-    np_inds2 = np_inds[np_inds < vp_idx_arr.size]
-    
-    nearest_vp_arr = vp_idx_arr[np_inds2]
-    
-    return nearest_vp_arr
-
-
-def add_nearest_neighbor_result_array(
-    gdf: gpd.GeoDataFrame, 
-    analysis_date: str,
-    **kwargs
-) -> pd.DataFrame:
-    """
-    Add the nearest k_neighbors result.
-    """    
-    nearest_vp_arr_series = []
-    
-    for row in gdf.itertuples():
-        
-        nearest_vp_arr = subset_arrays_to_valid_directions(
-            getattr(row, "vp_primary_direction"),
-            getattr(row, "vp_geometry"),
-            getattr(row, "vp_idx"),
-            getattr(row, "stop_geometry"),
-            getattr(row, "stop_primary_direction"),
-        )
-        
-        nearest_vp_arr_series.append(nearest_vp_arr)
-        
-    gdf2 = gdf.assign(
-        nearest_vp_arr = nearest_vp_arr_series
-    ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"])
-    
-    return gdf2
-
-
-
 def find_nearest_points(
     vp_coords_line: np.ndarray, 
     target_stop: shapely.Point, 
@@ -256,7 +145,7 @@ def filter_to_nearest2_vp(
     return before_idx, after_idx, before_vp_meters, after_vp_meters
 
 
-def new_subset_arrays_to_valid_directions(
+def subset_arrays_to_valid_directions(
     vp_direction_array: np.ndarray,
     vp_geometry: shapely.LineString,
     vp_idx_array: np.ndarray,
diff --git a/rt_segment_speeds/setup.py b/rt_segment_speeds/setup.py
index be6555fa4..ee07f9071 100644
--- a/rt_segment_speeds/setup.py
+++ b/rt_segment_speeds/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="segment_speed_utils",
     packages=find_packages(),
-    version="1.6",
+    version="1.7",
     description="Utility functions for GTFS RT segment speeds",
     author="Cal-ITP",
     license="Apache",

From 493712eaec2c461ff508cd86ef93d05f03055cc7 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 00:09:56 +0000
Subject: [PATCH 08/19] (refactor): stage2, 2b now consolidated as stage2

---
 _shared_utils/shared_utils/gtfs_analytics_data.yml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml
index 96ffcb1b7..712d31dc1 100644
--- a/_shared_utils/shared_utils/gtfs_analytics_data.yml
+++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml
@@ -73,8 +73,6 @@ stop_segments:
   dir: ${gcs_paths.SEGMENT_GCS}
   stage1: ${speeds_tables.vp_dwell}
   stage2: "nearest/nearest_vp_shape_segments"
-  stage2b: "nearest/nearest2_vp_shape_segments"
-  stage2c: "nearest/wide_nearest2_vp_shape_segments"
   stage3: "stop_arrivals"
   stage4: "speeds_stop_segments"
   trip_stop_cols: ["trip_instance_key", "stop_sequence"]
@@ -82,7 +80,7 @@ stop_segments:
   stop_pair_cols: ["stop_pair", "stop_pair_name"]
   route_dir_cols: ["route_id", "direction_id"]
   segment_cols: ["route_id", "direction_id", "stop_pair", "geometry"]
-  shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024
+  #shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024
   route_dir_single_segment: "rollup_singleday/speeds_route_dir_segments"
   route_dir_single_segment_detail: "rollup_singleday/speeds_route_dir_segments_detail" # interim for speedmaps
   route_dir_multi_segment: "rollup_multiday/speeds_route_dir_segments"
@@ -95,7 +93,6 @@ rt_stop_times:
   dir: ${gcs_paths.SEGMENT_GCS}
   stage1: ${speeds_tables.vp_dwell}
   stage2: "nearest/nearest_vp_rt_stop_times"
-  stage2b: "nearest/nearest2_vp_rt_stop_times"
   stage3: "rt_stop_times/stop_arrivals"
   stage4: "rt_stop_times/speeds"
   trip_stop_cols: ["trip_instance_key", "stop_sequence"]
@@ -116,7 +113,6 @@ speedmap_segments:
   stage1: ${speeds_tables.vp_dwell}
   proxy_stop_times: "stop_time_expansion/speedmap_stop_times"
   stage2: "nearest/nearest_vp_speedmap_proxy"
-  stage2b: "nearest/nearest2_vp_speedmap_proxy"
   stage3: "speedmap/stop_arrivals_proxy"
   stage3b: "speedmap/stop_arrivals"
   stage4: "speedmap/speeds"

From 3f04352347083b69de86321d20988add36679507 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 00:11:35 +0000
Subject: [PATCH 09/19] (remove): testing script consolidating nearest neighbor
 intermediate steps

---
 .../scripts/interpolate_stop_arrival.py       |   1 -
 .../scripts/new_nearest_vp_and_filter.py      | 142 ------------------
 2 files changed, 143 deletions(-)
 delete mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py

diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
index 4031a0c87..fafb920b8 100644
--- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py
+++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
@@ -95,7 +95,6 @@ def add_arrival_time(
             getattr(row, "subseq_location_timestamp_local"),
         ])
         
-        
         interpolated_arrival = segment_calcs.interpolate_stop_arrival_time(
             stop_position, projected_points, timestamp_arr)
         
diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
deleted file mode 100644
index 9a9b21fff..000000000
--- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import datetime
-import geopandas as gpd
-import numpy as np
-import pandas as pd
-import shapely
-import sys
-
-from loguru import logger
-
-from shared_utils import rt_dates
-from segment_speed_utils import helpers, neighbor
-from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT
-from segment_speed_utils.project_vars import PROJECT_CRS
-
-
-def stop_times_for_shape_segments(
-    analysis_date: str,
-    dict_inputs: dict
-) -> gpd.GeoDataFrame:
-    """
-    This is the stop times table using only 1 shape for each 
-    route-direction. Every trip belong to that shape
-    will be cut along the same stops.
-    This allows us to aggregate segments across trips because each 
-    segment has the same stop_id1 and stop_id2.
-    """
-    SEGMENT_FILE = dict_inputs["segments_file"]
-    
-    rt_trips = helpers.import_unique_vp_trips(analysis_date)
-
-    shape_stop_combinations = pd.read_parquet(
-        f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet",
-        columns = ["trip_instance_key",
-                   "stop_id1", "stop_pair",
-                   "st_trip_instance_key"],
-        filters = [[
-            ("trip_instance_key", "in", rt_trips)
-        ]]
-    ).rename(columns = {"stop_id1": "stop_id"})
-    
-    subset_trips = shape_stop_combinations.st_trip_instance_key.unique()
-    
-    stops_to_use = helpers.import_scheduled_stop_times(
-        analysis_date,
-        columns = ["trip_instance_key", "shape_array_key",
-                   "stop_sequence", "stop_id", "stop_pair",
-                   "stop_primary_direction", "geometry"],
-        filters = [[("trip_instance_key", "in", subset_trips)]],
-        get_pandas = True,
-        with_direction = True
-    ).rename(columns = {"trip_instance_key": "st_trip_instance_key"})
-    
-    stop_times = pd.merge(
-        stops_to_use,
-        shape_stop_combinations,
-        on = ["st_trip_instance_key", "stop_id", "stop_pair"],
-        how = "inner"
-    ).drop(
-        columns = "st_trip_instance_key"
-    ).drop_duplicates().reset_index(drop=True)
-        
-    return stop_times
-
-
-def new_nearest_neighbor_for_stop(
-    analysis_date: str,
-    segment_type: str,
-    config_path = GTFS_DATA_DICT
-):
-    """
-    """
-    start = datetime.datetime.now()
-    
-    dict_inputs = config_path[segment_type]
-    trip_stop_cols = [*dict_inputs["trip_stop_cols"]]
-    EXPORT_FILE = dict_inputs["stage2c"]
-    
-    stop_times = stop_times_for_shape_segments(
-        analysis_date, 
-        dict_inputs
-    )
-    
-    gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
-    
-    vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
-        neighbor.new_subset_arrays_to_valid_directions
-    )(
-        gdf.vp_primary_direction, 
-        gdf.vp_geometry, 
-        gdf.vp_idx,
-        gdf.stop_geometry,
-        gdf.stop_primary_direction,
-        gdf.shape_geometry,
-        gdf.stop_meters
-    )
-
-    gdf2 = gdf.assign(
-        prior_vp_idx = vp_before,
-        subseq_vp_idx = vp_after,
-        prior_vp_meters = vp_before_meters, 
-        subseq_vp_meters = vp_after_meters
-    )[trip_stop_cols + [
-        "shape_array_key", "stop_meters", 
-        "prior_vp_idx", "subseq_vp_idx",
-        "prior_vp_meters", "subseq_vp_meters"]
-    ]
-        
-    del gdf, stop_times
-    
-    gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet")
-    
-    end = datetime.datetime.now()
-    logger.info(f"nearest neighbor for {segment_type} "
-                f"{analysis_date}: {end - start}")    
-        
-    return 
-
-    
-
-if __name__ == "__main__":
-    
-    #from segment_speed_utils.project_vars import analysis_date_list
-    
-    from dask import delayed, compute
-    LOG_FILE = "../logs/test.log"
-    logger.add(LOG_FILE, retention="3 months")
-    logger.add(sys.stderr, 
-               format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", 
-               level="INFO")
-    
-    analysis_date_list = [rt_dates.DATES["sep2024"]]
-    segment_type = "stop_segments"
-    
-    delayed_dfs = [
-        delayed(new_nearest_neighbor_for_stop)(
-            analysis_date = analysis_date,
-            segment_type = segment_type,
-            config_path = GTFS_DATA_DICT
-        ) for analysis_date in analysis_date_list
-    ]
-
-    [compute(i)[0] for i in delayed_dfs]

From 406035a47dffa825c559620bb6cba43569d4d43c Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Fri, 13 Dec 2024 18:51:00 +0000
Subject: [PATCH 10/19] (open_data): update readme to include 2 speed layers

---
 open_data/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/open_data/README.md b/open_data/README.md
index ecb672b5d..48e73d5f6 100644
--- a/open_data/README.md
+++ b/open_data/README.md
@@ -5,6 +5,8 @@
 1. [HQTA Stops](https://gis.data.ca.gov/datasets/f6c30480f0e84be699383192c099a6a4_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/MapServer)
 1. [CA Transit Routes](https://gis.data.ca.gov/datasets/dd7cb74665a14859a59b8c31d3bc5a3e_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/MapServer)
 1. [CA Transit Stops](https://gis.data.ca.gov/datasets/900992cc94ab49dbbb906d8f147c2a72_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/MapServer)
+1. [CA Average Transit Speeds by Stop-to-Stop Segments](https://gis.data.ca.gov/datasets/4937eeb59fdb4e56ae75e64688c7f2c0_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/MapServer/0)
+1. [CA Average Transit Speeds by Route and Time of Day](https://gis.data.ca.gov/datasets/071df783099f4224b7ebb54839eae007_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/MapServer/0)
 1. All GTFS datasets [metadata/data dictionary](https://data.ca.gov/dataset/cal-itp-gtfs-ingest-pipeline-dataset/resource/e26bf6ee-419d-4a95-8e4c-e2b13d5de793)
 
 ## GTFS Schedule Routes & Stops Geospatial Data

From 9fe9735286c24f0f5a0e5c2567e7cbbdfd0b8750 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 01:26:34 +0000
Subject: [PATCH 11/19] (remove): no longer need filtering nearest 10 to 2 vp
 script

---
 rt_segment_speeds/scripts/vp_around_stops.py | 269 -------------------
 1 file changed, 269 deletions(-)
 delete mode 100644 rt_segment_speeds/scripts/vp_around_stops.py

diff --git a/rt_segment_speeds/scripts/vp_around_stops.py b/rt_segment_speeds/scripts/vp_around_stops.py
deleted file mode 100644
index 9f76378e0..000000000
--- a/rt_segment_speeds/scripts/vp_around_stops.py
+++ /dev/null
@@ -1,269 +0,0 @@
-"""
-Filter the nearest 10 neighbors down to the 
-nearest 2 neighbors for each stop position.
-Attach the projected stop position against shape,
-projected vp position against shape, and timestamps.
-"""
-import datetime
-import geopandas as gpd
-import pandas as pd
-import sys
-
-from dask import delayed, compute
-from loguru import logger
-from pathlib import Path
-from typing import Literal, Optional
-
-from segment_speed_utils import helpers
-from shared_utils import geo_utils
-from update_vars import SEGMENT_GCS, GTFS_DATA_DICT
-from segment_speed_utils.project_vars import SEGMENT_TYPES, PROJECT_CRS
-
-def stops_projected_against_shape(
-    input_file: str,
-    analysis_date: str, 
-    trip_stop_cols: list,
-) -> pd.DataFrame:
-    """
-    From nearest 10 vp points, project the stop geometry
-    onto shape geometry and get 
-    stop_meters.
-    """
-    stop_position = gpd.read_parquet(
-        f"{SEGMENT_GCS}{input_file}_{analysis_date}.parquet",
-        columns = trip_stop_cols + [
-            "shape_array_key", "stop_geometry"],
-    ).to_crs(PROJECT_CRS)
-    
-    shapes = helpers.import_scheduled_shapes(
-        analysis_date,
-        columns = ["shape_array_key", "geometry"],
-        crs = PROJECT_CRS,
-        get_pandas = True
-    )
-    
-    gdf = pd.merge(
-        stop_position,
-        shapes.rename(columns = {"geometry": "shape_geometry"}),
-        on = "shape_array_key",
-        how = "inner"
-    )
-    
-    gdf = gdf.assign(
-        stop_meters = gdf.shape_geometry.project(gdf.stop_geometry),
-    )[trip_stop_cols + ["stop_meters"]]
-    
-    del shapes, stop_position
-    
-    return gdf
-
-
-def explode_vp_nearest(
-    input_file: str,
-    analysis_date: str, 
-    trip_stop_cols: list,
-) -> pd.DataFrame:
-    """
-    Take nearest 10 vp, which holds vp_idx as an array,
-    and explode it so it becomes long.
-    """
-    vp_nearest = pd.read_parquet(
-        f"{SEGMENT_GCS}{input_file}_{analysis_date}.parquet",
-        columns = trip_stop_cols + [
-            "shape_array_key",
-            "nearest_vp_arr"],
-    ).explode(
-        "nearest_vp_arr"
-    ).drop_duplicates().reset_index(
-        drop=True
-    ).rename(
-        columns = {"nearest_vp_arr": "vp_idx"}
-    ).astype({"vp_idx": "int64"})
-    
-    return vp_nearest
-
-
-def get_vp_projected_against_shape(
-    input_file: str,
-    analysis_date: str, 
-    **kwargs
-) -> pd.DataFrame:
-    """
-    Put in subset of vp_idx (using the kwargs)
-    and turn the x, y into vp point geometry.
-    Merge in shapes and project the vp position
-    against shape geometry, and save out
-    shape_meters.
-    """    
-    # Get crosswalk of trip to shapes
-    trips_to_shapes = helpers.import_scheduled_trips(
-        analysis_date,
-        columns = ["trip_instance_key", "shape_array_key"],
-        get_pandas = True
-    )
-    
-    # Get shapes
-    shapes = helpers.import_scheduled_shapes(
-        analysis_date,
-        columns = ["shape_array_key", "geometry"],
-        crs = PROJECT_CRS,
-        get_pandas = True
-    )
-    
-    # Subset usable vp with only the ones present in exploded vp
-    # and turn those into vp geometry
-    vp = pd.read_parquet(
-        f"{SEGMENT_GCS}{input_file}_{analysis_date}",
-        columns = ["trip_instance_key", "vp_idx", "x", "y"],
-        **kwargs
-    ).pipe(geo_utils.vp_as_gdf, crs = PROJECT_CRS)
-    
-    # Merge all together so we can project vp point goem
-    # against shape line geom
-    gdf = pd.merge(
-        vp.rename(columns = {"geometry": "vp_geometry"}),
-        trips_to_shapes,
-        on = "trip_instance_key",
-        how = "inner"
-    ).merge(
-        shapes.rename(columns = {"geometry": "shape_geometry"}),
-        on = "shape_array_key",
-        how = "inner"
-    ).set_geometry("vp_geometry")
-
-    del trips_to_shapes, shapes, vp
-    
-    gdf = gdf.assign(
-        shape_meters = gdf.shape_geometry.project(gdf.vp_geometry),
-    )[["vp_idx", "shape_meters"]]
-            
-    return gdf
-
-
-def find_two_closest_vp(
-    df: pd.DataFrame, 
-    group_cols: list
-) -> pd.DataFrame:
-    """
-    Based on the distances calculated between vp and stop, 
-    keep the 2 observations that are closest. Find the smallest
-    positive distance and negative distance.
-    
-    This filters down the nearest 10 into nearest 2.
-    """
-    positive_distances_df = df.loc[df.stop_vp_distance_meters >= 0]
-    negative_distances_df = df.loc[df.stop_vp_distance_meters < 0]
-    
-    #https://github.com/pandas-dev/pandas/issues/45089
-    # add dropna=False or else too many combos are lost
-    min_pos_distance = (
-        positive_distances_df
-        .groupby(group_cols, 
-                 observed=True, group_keys=False, dropna=False)
-        .agg({"stop_vp_distance_meters": "min"})
-        .reset_index()
-    )
-    
-    min_neg_distance = (
-        negative_distances_df
-        .groupby(group_cols, 
-                 observed=True, group_keys=False, dropna=False)
-        .agg({"stop_vp_distance_meters": "max"})
-        .reset_index()
-    )
-
-    two_vp = pd.concat(
-        [min_pos_distance, min_neg_distance], 
-        axis=0, ignore_index=True
-    )
-    
-    return two_vp
-
-    
-def filter_to_nearest_two_vp(
-    analysis_date: str,
-    segment_type: Literal[SEGMENT_TYPES],
-    config_path: Optional[Path] = GTFS_DATA_DICT
-):
-    dict_inputs = config_path[segment_type]
-    trip_stop_cols = [*dict_inputs["trip_stop_cols"]]
-    USABLE_VP_FILE = dict_inputs["stage1"]
-    INPUT_FILE = dict_inputs["stage2"]
-    EXPORT_FILE = dict_inputs["stage2b"]
-
-    start = datetime.datetime.now()
-    
-    stop_meters_df = delayed(stops_projected_against_shape)(
-        INPUT_FILE, analysis_date, trip_stop_cols)
-    
-    vp_nearest = delayed(explode_vp_nearest)(
-        INPUT_FILE, analysis_date, trip_stop_cols)
-
-    subset_vp = vp_nearest.vp_idx.unique()
-        
-    vp_meters_df = delayed(get_vp_projected_against_shape)(
-        USABLE_VP_FILE,
-        analysis_date, 
-        filters = [[("vp_idx", "in", subset_vp)]]
-    )
-    
-    gdf = delayed(pd.merge)(
-        vp_nearest,
-        stop_meters_df,
-        on = trip_stop_cols,
-        how = "inner"
-    ).merge(
-        vp_meters_df,
-        on = "vp_idx",
-        how = "inner"
-    )
-    
-    # Calculate the distance between the stop and vp position
-    # This is used to find the minimum positive and minimum negative
-    # distance (get at vp before and after stop)
-    gdf = gdf.assign(
-        stop_meters = gdf.stop_meters.round(3),
-        shape_meters = gdf.shape_meters.round(3),
-        stop_vp_distance_meters = (gdf.stop_meters - gdf.shape_meters).round(2)
-    )
-    
-    gdf_filtered = delayed(find_two_closest_vp)(gdf, trip_stop_cols)
-    
-    gdf2 = delayed(pd.merge)(
-        gdf,
-        gdf_filtered,
-        on = trip_stop_cols + ["stop_vp_distance_meters"],
-        how = "inner"
-    )
-    
-    gdf2 = compute(gdf2)[0]
-    
-    del gdf, gdf_filtered, vp_nearest, stop_meters_df, vp_meters_df
-        
-    gdf2.to_parquet(
-        f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet",
-    )
-        
-    end = datetime.datetime.now()
-    logger.info(f"nearest 2 vp for {segment_type} "
-                f"{analysis_date}: {end - start}")
-    
-    del gdf2
-    
-    return
-    
-'''
-if __name__ == "__main__":
-    
-    from segment_speed_utils.project_vars import analysis_date_list
-    
-    delayed_dfs = [
-        delayed(filter_to_nearest_two_vp)(
-            analysis_date = analysis_date,
-            segment_type = segment_type,
-            config_path = GTFS_DATA_DICT
-        ) for analysis_date in analysis_date_list
-    ]
-
-    [compute(i)[0] for i in delayed_dfs]
-'''
\ No newline at end of file

From 873a97cc80e6aacade880757160a74cb21c6c59e Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 01:31:04 +0000
Subject: [PATCH 12/19] (remove): array_utils, move into segment_calcs

---
 .../segment_speed_utils/array_utils.py        | 37 -------------------
 1 file changed, 37 deletions(-)
 delete mode 100644 rt_segment_speeds/segment_speed_utils/array_utils.py

diff --git a/rt_segment_speeds/segment_speed_utils/array_utils.py b/rt_segment_speeds/segment_speed_utils/array_utils.py
deleted file mode 100644
index 295aae47e..000000000
--- a/rt_segment_speeds/segment_speed_utils/array_utils.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-Functions for working with numpy arrays.
-"""
-import numpy as np
-import pandas as pd
-
-from numba import jit
-
-def rolling_window_make_array(
-    df: pd.DataFrame, 
-    window: int, 
-    rolling_col: str
-) -> pd.DataFrame:
-    # https://stackoverflow.com/questions/47482009/pandas-rolling-window-to-return-an-array
-    df[f"rolling_{rolling_col}"] = [
-        np.asarray(window) for window in 
-        df.groupby("trip_instance_key")[rolling_col].rolling(
-            window = window, center=True)
-    ]
-    
-    is_monotonic_series = np.vectorize(monotonic_check)(df[f"rolling_{rolling_col}"])
-    df[f"{rolling_col}_monotonic"] = is_monotonic_series
-    
-    return df
-    
-@jit(nopython=True)
-def monotonic_check(arr: np.ndarray) -> bool:
-    """
-    For an array, check if it's monotonically increasing. 
-    https://stackoverflow.com/questions/4983258/check-list-monotonicity
-    """
-    diff_arr = np.diff(arr)
-    
-    if np.all(diff_arr > 0):
-        return True
-    else:
-        return False
\ No newline at end of file

From 8255c381d4c4eca68af3404540923cbaa0026edd Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 01:31:37 +0000
Subject: [PATCH 13/19] (segment_speed_utils): remove unused functions

---
 .../segment_speed_utils/segment_calcs.py      | 31 ++++++++++++++++++-
 .../segment_speed_utils/vp_transform.py       | 17 +---------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py
index 9bfac4613..004490ece 100644
--- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py
+++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py
@@ -1,3 +1,6 @@
+"""
+Functions related to calculating segment speeds.
+"""
 import dask.dataframe as dd
 import dask_geopandas as dg
 import geopandas as gpd
@@ -149,4 +152,30 @@ def interpolate_stop_arrival_time(
 
     return np.interp(
         stop_position, np.asarray(shape_meters_arr), timestamp_arr
-    ).astype("datetime64[s]")
\ No newline at end of file
+    ).astype("datetime64[s]")
+
+
+def rolling_window_make_array(
+    df: pd.DataFrame, 
+    window: int, 
+    rolling_col: str
+) -> pd.DataFrame:
+    """
+    Interpolated stop arrival times are checked
+    to see if they are monotonically increasing.
+    If it isn't, it gets recalculated based on 
+    stop_meters (the stop's position) relative to
+    other stop arrival times.
+    
+    https://stackoverflow.com/questions/47482009/pandas-rolling-window-to-return-an-array
+    """
+    df[f"rolling_{rolling_col}"] = [
+        np.asarray(window) for window in 
+        df.groupby("trip_instance_key")[rolling_col].rolling(
+            window = window, center=True)
+    ]
+    
+    is_monotonic_series = np.vectorize(monotonic_check)(df[f"rolling_{rolling_col}"])
+    df[f"{rolling_col}_monotonic"] = is_monotonic_series
+    
+    return df
\ No newline at end of file
diff --git a/rt_segment_speeds/segment_speed_utils/vp_transform.py b/rt_segment_speeds/segment_speed_utils/vp_transform.py
index 48694b585..bc41a9a90 100644
--- a/rt_segment_speeds/segment_speed_utils/vp_transform.py
+++ b/rt_segment_speeds/segment_speed_utils/vp_transform.py
@@ -53,19 +53,4 @@ def condense_point_geom_to_line(
            .reset_index()
           )
     
-    return df3
-
-
-def sort_by_vp_idx_order(
-    vp_idx_array: np.ndarray, 
-    geometry_array: np.ndarray,
-    timestamp_array: np.ndarray,
-) -> tuple[np.ndarray]:    
-    
-    sort_order = np.argsort(vp_idx_array, axis=0)
-    
-    vp_sorted = np.take_along_axis(vp_idx_array, sort_order, axis=0)
-    geom_sorted = np.take_along_axis(geometry_array, sort_order, axis=0)
-    timestamp_sorted = np.take_along_axis(timestamp_array, sort_order, axis=0)
-    
-    return vp_sorted, geom_sorted, timestamp_sorted
\ No newline at end of file
+    return df3
\ No newline at end of file

From d17d8b309507f3447c7746d20e4af8ca30ac89d1 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 01:31:55 +0000
Subject: [PATCH 14/19] update init after array_utils removed

---
 rt_segment_speeds/segment_speed_utils/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rt_segment_speeds/segment_speed_utils/__init__.py b/rt_segment_speeds/segment_speed_utils/__init__.py
index 4d10f7d0e..d7e66563e 100644
--- a/rt_segment_speeds/segment_speed_utils/__init__.py
+++ b/rt_segment_speeds/segment_speed_utils/__init__.py
@@ -1,5 +1,4 @@
 from . import (
-    array_utils,
     gtfs_schedule_wrangling,
     helpers,
     metrics,
@@ -12,7 +11,6 @@
 )
 
 __all__ = [
-    "array_utils",
     "gtfs_schedule_wrangling",
     "helpers",
     "metrics",

From bc6469835495caade0d52dea926bfdf0ab74ca58 Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.ku@dot.ca.gov>
Date: Wed, 18 Dec 2024 16:17:55 +0000
Subject: [PATCH 15/19] rename functions for clarity, test sep-nov2024 dates
 for nearest vp step

---
 .../scripts/interpolate_stop_arrival.py       |  5 +-
 .../scripts/nearest_vp_to_stop.py             |  3 +-
 .../scripts/pipeline_rt_stop_times.py         |  5 +-
 .../scripts/pipeline_segment_speeds.py        |  7 +--
 .../scripts/pipeline_speedmap.py              |  7 +--
 .../segment_speed_utils/neighbor.py           | 54 +++++++++----------
 .../segment_speed_utils/segment_calcs.py      | 14 +++++
 7 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
index fafb920b8..0495893e9 100644
--- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py
+++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py
@@ -17,8 +17,7 @@
 from pathlib import Path
 from typing import Literal, Optional
 
-from segment_speed_utils import (array_utils, helpers, 
-                                 segment_calcs)
+from segment_speed_utils import helpers, segment_calcs
 from update_vars import SEGMENT_GCS, GTFS_DATA_DICT
 from segment_speed_utils.project_vars import PROJECT_CRS, SEGMENT_TYPES
 from shared_utils import rt_dates
@@ -166,7 +165,7 @@ def enforce_monotonicity_and_interpolate_across_stops(
     df = segment_calcs.convert_timestamp_to_seconds(
         df, ["arrival_time"])
 
-    df = array_utils.rolling_window_make_array(
+    df = segment_calcs.rolling_window_make_array(
         df, 
         window = 3, rolling_col = "arrival_time_sec"
     )
diff --git a/rt_segment_speeds/scripts/nearest_vp_to_stop.py b/rt_segment_speeds/scripts/nearest_vp_to_stop.py
index 2ea13aae0..6706637b4 100644
--- a/rt_segment_speeds/scripts/nearest_vp_to_stop.py
+++ b/rt_segment_speeds/scripts/nearest_vp_to_stop.py
@@ -4,6 +4,7 @@
 """
 import datetime
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import sys
 
@@ -158,7 +159,7 @@ def nearest_neighbor_for_stop(
     gdf = neighbor.merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date)
     
     vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
-        neighbor.subset_arrays_to_valid_directions
+        neighbor.two_nearest_neighbor_near_stop
     )(
         gdf.vp_primary_direction, 
         gdf.vp_geometry, 
diff --git a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
index a37199c96..39cc7567e 100644
--- a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
+++ b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py
@@ -38,7 +38,7 @@
         
     logger.remove()
 
-
+    
     LOG_FILE = "../logs/interpolate_stop_arrival.log"
     logger.add(LOG_FILE, retention="3 months")
     logger.add(sys.stderr, 
@@ -57,7 +57,7 @@
 
     logger.remove()
 
-
+    
     LOG_FILE = "../logs/speeds_by_segment_trip.log"
     logger.add(LOG_FILE, retention="3 months")
     logger.add(sys.stderr, 
@@ -75,3 +75,4 @@
     [compute(i)[0] for i in delayed_dfs]
 
     logger.remove()
+    
\ No newline at end of file
diff --git a/rt_segment_speeds/scripts/pipeline_segment_speeds.py b/rt_segment_speeds/scripts/pipeline_segment_speeds.py
index fe8084eba..fa7922c14 100644
--- a/rt_segment_speeds/scripts/pipeline_segment_speeds.py
+++ b/rt_segment_speeds/scripts/pipeline_segment_speeds.py
@@ -17,7 +17,7 @@
 if __name__ == "__main__":
     
     from segment_speed_utils.project_vars import analysis_date_list    
-
+    
     segment_type = "stop_segments"
     print(f"segment_type: {segment_type}")
     
@@ -59,7 +59,7 @@
 
     logger.remove()
 
-
+    
     LOG_FILE = "../logs/speeds_by_segment_trip.log"
     logger.add(LOG_FILE, retention="3 months")
     logger.add(sys.stderr, 
@@ -76,4 +76,5 @@
 
     [compute(i)[0] for i in delayed_dfs]
 
-    logger.remove()
\ No newline at end of file
+    logger.remove()
+    
\ No newline at end of file
diff --git a/rt_segment_speeds/scripts/pipeline_speedmap.py b/rt_segment_speeds/scripts/pipeline_speedmap.py
index 293db2e44..86997ebfe 100644
--- a/rt_segment_speeds/scripts/pipeline_speedmap.py
+++ b/rt_segment_speeds/scripts/pipeline_speedmap.py
@@ -90,7 +90,7 @@ def concatenate_speedmap_proxy_arrivals_with_remaining(
 
     logger.remove()
 
-
+    
     LOG_FILE = "../logs/interpolate_stop_arrival.log"
     logger.add(LOG_FILE, retention="3 months")
     logger.add(sys.stderr, 
@@ -109,7 +109,7 @@ def concatenate_speedmap_proxy_arrivals_with_remaining(
 
     logger.remove()
     
-        
+    
     t0 = datetime.datetime.now()
     delayed_dfs = [
         delayed(concatenate_speedmap_proxy_arrivals_with_remaining)(
@@ -139,4 +139,5 @@ def concatenate_speedmap_proxy_arrivals_with_remaining(
 
     [compute(i)[0] for i in delayed_dfs]
     
-    logger.remove()
\ No newline at end of file
+    logger.remove()
+    
\ No newline at end of file
diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py
index f5d21ce84..2e3f9a307 100644
--- a/rt_segment_speeds/segment_speed_utils/neighbor.py
+++ b/rt_segment_speeds/segment_speed_utils/neighbor.py
@@ -15,7 +15,7 @@ def merge_stop_vp_for_nearest_neighbor(
     stop_times: gpd.GeoDataFrame,
     analysis_date: str,
     **kwargs
-):
+) -> gpd.GeoDataFrame:
     """
     Merge stop times file with vp.
     vp gdf has been condensed so that all the vp coords
@@ -64,7 +64,7 @@ def merge_stop_vp_for_nearest_neighbor(
 
 
 def find_nearest_points(
-    vp_coords_line: np.ndarray, 
+    vp_coords_array: np.ndarray, 
     target_stop: shapely.Point, 
     vp_idx_array: np.ndarray,
 ) -> np.ndarray:
@@ -79,7 +79,7 @@ def find_nearest_points(
     (ex: nearest 5 vp to each stop).
     """
     indices = geo_utils.nearest_snap(
-        vp_coords_line, 
+        vp_coords_array, 
         target_stop, 
         k_neighbors = 5
     )
@@ -89,17 +89,17 @@ def find_nearest_points(
     # if we want 10 nearest neighbors and 8th, 9th, 10th are all
     # the same result, the 8th will have a result, then 9th and 10th will
     # return the length of the array (which is out-of-bounds)
+    # using vp_coords_array keeps too many points (is this because coords can be dupes?)
     indices2 = indices[indices < vp_idx_array.size]
     
     return indices2
 
 
 def filter_to_nearest2_vp(
-    vp_coords_line: np.ndarray,
+    nearest_vp_coords_array: np.ndarray,
     shape_geometry: shapely.LineString,
-    vp_idx_array: np.ndarray,
+    nearest_vp_idx_array: np.ndarray,
     stop_meters: float,
-    indices_of_nearest: np.ndarray,
 ) -> tuple[np.ndarray]:
     """
     Take the indices that are the nearest.
@@ -109,16 +109,11 @@ def filter_to_nearest2_vp(
     Filter down to the nearest 2 vp before and after a stop.
     If there isn't one before or after, a value of -1 is returned.
     """
-    # Subset the array of vp coords and vp_idx_array with 
-    # the indices that show the nearest k neighbors.
-    nearest_vp = vp_coords_line[indices_of_nearest]
-    nearest_vp_idx = vp_idx_array[indices_of_nearest]
-    
     # Project these vp coords to shape geometry and see how far it is
     # from the stop's position on the shape
     nearest_vp_projected = np.asarray(
         [shape_geometry.project(shapely.Point(i)) 
-         for i in nearest_vp]
+         for i in nearest_vp_coords_array]
     )
 
     # Negative values are before the stop
@@ -126,26 +121,28 @@ def filter_to_nearest2_vp(
     before_indices = np.where(nearest_vp_projected - stop_meters < 0)[0]
     after_indices = np.where(nearest_vp_projected - stop_meters > 0)[0]
     
+    # Set missing values when we're not able to find a nearest neighbor result
+    # use -1 as vp_idx (since this is not present in vp_usable)
+    # and zeroes for meters
+    before_idx = -1
+    after_idx = -1
+    before_vp_meters = 0
+    after_vp_meters = 0
+    
     # Grab the closest vp before a stop (-1 means array was empty)
     if before_indices.size > 0:
-        before_idx = nearest_vp_idx[before_indices][-1] 
+        before_idx = nearest_vp_idx_array[before_indices][-1] 
         before_vp_meters = nearest_vp_projected[before_indices][-1]
-    else:
-        before_idx = -1
-        before_vp_meters = 0
-        
+   
     # Grab the closest vp after a stop (-1 means array was empty)
     if after_indices.size > 0:
-        after_idx = nearest_vp_idx[after_indices][0]
+        after_idx = nearest_vp_idx_array[after_indices][0]
         after_vp_meters = nearest_vp_projected[after_indices][0]
-    else:
-        after_idx = -1
-        after_vp_meters = 0
     
     return before_idx, after_idx, before_vp_meters, after_vp_meters
 
 
-def subset_arrays_to_valid_directions(
+def two_nearest_neighbor_near_stop(
     vp_direction_array: np.ndarray,
     vp_geometry: shapely.LineString,
     vp_idx_array: np.ndarray,
@@ -170,23 +167,22 @@ def subset_arrays_to_valid_directions(
     valid_indices = (vp_direction_array != opposite_direction).nonzero()   
 
     # These are vp coords where index values of opposite direction is excluded
-    valid_vp_coords_line = np.array(vp_geometry.coords)[valid_indices]
+    valid_vp_coords_array = np.array(vp_geometry.coords)[valid_indices]
     
     # These are the subset of vp_idx values where opposite direction is excluded
-    valid_vp_idx_arr = np.asarray(vp_idx_array)[valid_indices]  
+    valid_vp_idx_array = np.asarray(vp_idx_array)[valid_indices]  
     
     nearest_indices = find_nearest_points(
-        valid_vp_coords_line, 
+        valid_vp_coords_array, 
         stop_geometry, 
-        valid_vp_idx_arr,
+        valid_vp_idx_array,
     )
  
     before_vp, after_vp, before_meters, after_meters = filter_to_nearest2_vp(
-        valid_vp_coords_line,
+        valid_vp_idx_array[nearest_indices], # subset of coords in nn
         shape_geometry,
-        valid_vp_idx_arr,
+        valid_vp_idx_array[nearest_indices], # subset of vp_idx in nn
         stop_meters,
-        nearest_indices,
     )
     
     return before_vp, after_vp, before_meters, after_meters
\ No newline at end of file
diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py
index 004490ece..4fb21c5b8 100644
--- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py
+++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 
+from numba import jit
 from typing import Union
 
 from shared_utils.rt_utils import MPH_PER_MPS
@@ -155,6 +156,19 @@ def interpolate_stop_arrival_time(
     ).astype("datetime64[s]")
 
 
+@jit(nopython=True)
+def monotonic_check(arr: np.ndarray) -> bool:
+    """
+    For an array, check if it's monotonically increasing. 
+    https://stackoverflow.com/questions/4983258/check-list-monotonicity
+    """
+    diff_arr = np.diff(arr)
+    
+    if np.all(diff_arr > 0):
+        return True
+    else:
+        return False
+    
 def rolling_window_make_array(
     df: pd.DataFrame, 
     window: int, 

From 6a6ab6827f2b421b0fc70f86959acd6d1e5dd1f7 Mon Sep 17 00:00:00 2001
From: csuyat-dot <christian.suyat@dot.ca.gov>
Date: Thu, 19 Dec 2024 21:35:49 +0000
Subject: [PATCH 16/19] updated cover sheet template. started explore NB to
 update the save rtpa outputs function. mainly, changes to the excel writer
 portion so it only writes data related to the RTPA, not the entire dataset.

---
 ntd/cover_sheet_template.xlsx              | Bin 12444 -> 12439 bytes
 ntd/explore_save_rtpa_outputs_update.ipynb | 307 +++++++++++++++++++++
 2 files changed, 307 insertions(+)
 create mode 100644 ntd/explore_save_rtpa_outputs_update.ipynb

diff --git a/ntd/cover_sheet_template.xlsx b/ntd/cover_sheet_template.xlsx
index df54954fee4f761f4e44f17d4da8d6c5dc9e087c..03cb9c6ee6cb7b2e473df2029bc836a23b62426b 100644
GIT binary patch
delta 6000
zcmZ8_bx_>Rvi9P^B@iS?a0s@+f&{l9yFdu;?(X^nf(CaF?(Q03aS2XvC-{aC+_}8x
z+<Q)a-~2Pv{dCQA*Hm>s-I<0dhBZs5fbWG9g&z_KH2MMr!Ucgq9yT2A_D<GD_V(87
z9=0|Ws<!s|e0Yz3(+_BU?ijoM$g?g)Yc5Gks!3+cf~B8!0-4nioCml|%O3s+GEm#T
zFIK7YRyG>^w)vNfYa(@qBco{UTj@Tj{hok8txHrBKZnL0-+Wgkr*V<>cRE`!z-n!}
z^-<jWVRZ3NS5{T4hu6lvERugy_EA!zu0^`9-c-mrV@E!lh8#N?jUhATSm(<`LSLy;
z8>^WnRvX@+#@koago6erbte1}4`J?xByBt~YnlK*{o1PR$@F`YmXNUGTrQ_I7tr5s
zD+YaOC0*;&ll$78zz?%{m#CggfOI!;FRmtdVMf+KfUnH3bwh|_2VFEndm8O8?Wp9w
z1O{&?KOfF84~nl@S%|k9A`nlsX1w_xZ1TWKhG-e3g--36EqQjiV)FoRFyZ;WD5p-V
zdzHjp_NlSzXF-Z3jE-%$4OhOih>EvwQVOXYW@WrC*5=uia+WlSp%tGCB(vb1BI9NS
zI2w45ybP)>p(kRC<KC@Ara0cZRUM03aO@Y`T#A1b0*mF=it6LBMAG37O8`<Ph1hxD
zjvPI%Dnn2Hc>VHfGGC|nS)C)-f*)t`4ja<mqq)ZG1lx%J(AHai5*frjm*itUk;A}#
z%w{B33bsDQ;t3cbZci2hLJMg3I>)5Kj`ahYsEi|{uw<e)r8UhYOTR?T;wq6#S;lW&
z3nJ4xq&jmBS*hEh2+ix3{0=yXw_(7n>j@yT=hmxS9aK6~>@~ToLR2GO#?C)_gpgR3
z+S+MqpIqp?kHr@I?xRyBv(3h0f--`^O03i54Jch|AQ=BLZbZiha#F`G=4WGc-n)_0
zo=_}15WnN4AxDwk%GU&;Ye+_zn3#0tbL-`b+GaMoCHnr@B$%)Ixr0I6ls=$vl>XwU
z*j5LLsU37;w_sfC$cl^t$z(AABg^iW6vrT`$$&qtQ)RF{N|j+@w0TQ?H?J9cP1I^1
zREX*;p=a60Lm|9JfU0@3<kG<l#*|Ei??nIc5xKgMo{e_;OIZ^F!upj7t^F%a93>a7
zL{fGPR@$#M)*iCE9>Q{4v!iQ6WhbMGYf@a!KIsJfqS<AyDCTp?>Q|5b>R0UGUoUbI
zdQJ0AJH2n$$*ZYv(<u)!SIG}ZfbTlGln35%Rmb-A##zKE6v}mcYe20I`=eGN0F_C7
zN>y@ZZ6T9MOHWW3_e+R-OH_Z2!G(xplwVWx5)A~pqyd3QKnW4-WI*POd`t!y547Q3
z_=q;@4zA`y2(G9z%|EU@JcE<GtT2eQF;6Ht1pbhu!{y@2Ea=xZ?9JeYHx8dDyzmF)
zR#W(JXd@*jT-ok)!vUXlWX#A4W8X1>)d+&@;C|fl7^-9$+sh`R9%?)<?X9`i+&6uv
zYD{JlLSd9^+)GTmz{VUrj3UzDg72d%T{w*#VS>=Npq>@u{h~OX&*@Z&`>R1DQ}57j
zmep`B^O*f`i&Gb=Lj0|HK($F(E_?X>AOCK7&Rh33QOx>&<r<h8?oRIhrq>fWVNb(6
z9M#n3NIhgSa?^C5`77vexr_Fbu0ke0tzLnCxB6R8eYwAID}cUQ;}#mJ_fAB&h;OK^
zudCb!GGl^|vRHhf5=&2nO*!)xcQ+<RHCa7Tc<g|xLu-yeZ9zRa(n78yIs&^ewIbOV
z=^=5;VqJ%JuxVYAvE?Nm-N4Y*p4!p(rxQ|2DL<xG(st6jvk0+9i-lZg{foc#vF&pR
zoL?#)>WD8|34mmYE`*m1TpW(8Vjg@n^bave4?R#71I_lPK3U4~Z6?LN91ZtNA!K!J
z!&PZ7a$3I{Xu1B(-d_=yErU}Tobf@H=F~CHHjnM8Co=G!iX6ADi8>?hHfx?zFMa7d
zFzM(GCJ6NO^qiBQo)XhGqnE&VLHZ~=2pF<(5RsSwz~_N08Fzc6MNw>hcL{+LEt0Ok
zg?EiS^etFVz>8K_fi*j->XevKzpKZiiLLL|hgObHzMk#4!0no9M3-xePS?}*l~=pI
zy*-t0<Lz9ZRabj7eDuYYsy16lR|{@=_MPnlo5)rBj%uugbcnv!o8PwqtY}lAPTR_q
zo1I^QTXVrr<6&x5>TH~YKPt)#ZLX4tp&brtm^BZ%7!$;HL6&zas-J9eNvD?er*^q&
z(|9K^8TI(eQ5Jk0Y##Y~d<$JXH~TQn8$oToD8jl8+b_Pbb7XpKR=g5*XF57d;psUa
zq1Ba={w(Y9Z!>Le?<U@!>u7-{G3iNEz+~FkZ1(bhuUKC9XcFmZdGL~|zK<nc5lIo@
zvU=)p*b1)K@_(=wx}!2&10e4bNmrg+WO8(?KWv4YU4(4A-w$_+d(!$_s|V4yRim0K
zZcf-Ml2oo~Zhd3&D6Ox}J<||@pL%84mcEu>aDZ^-EAk{!aD2}2v>@jKRvTQL7wX(B
z_ezU%o7}cK+${TtpcV2gP_e+JC8@Ke!YG>d5_VheU{Eyvl4Si7<TAAL&k!X6s$T88
zp8W$$J;4d-90<}#F7qCvwa@g~Ooto4s1)UDx>I0!g+I^y@VjbVs7M^a!(`(2gbB**
zT>WT=+c(La-TsYK(jN-Ic8{yoSj65eq9f&9$U)l+_%N1-Q<lxymT8?7w5dp;X(_B}
zfXFs{K#z!jDJY=s_2OJh9orN)y<-zP(w1}!PnCl_cer!QKSEn0dkSN^w0dt(<!DcM
zk&2@Y22wF6YWCdDrep%&UJ>|>9kTh+sY9M@<`)$byylVPQ@}ynt7ZEaA!=V2PW9YH
z>`RD!I2)@1mtXwWRbe@E=t^SfZz1VXp3nyH7Ya%RW0$av!;$b~7QDS;HmtDMA=vD_
z3}pT9SEz%gwkq*eVWx+Y`Pa$5^pUUD$tY-A6tXPd3N|wuHh9$Z=@#L%9`s}pGfrTw
zv>G06aJuFlj01}rJ>6NaRm2>4b|qC7`NxAi<yL;CN<8NH*Et)%LLISliQ5>de}*E^
zQn~~-euYfrvuqMTwmuNOK&oCPaHrS#u};_qf6d<h99@ut3%yR>0qn0eF=}6wEVzw;
zed@ED9qaQG+FMmY5jXYXDMEy%6VIOOvi$>k0H;NG951V~NbcRW)qDCsH2F_>+Tf!e
zuY7FTmur#S9zNQF?EtC)vG0od|540X@A<J~W&SU$BEoTe9}&c*WwrN=<$Da85~GHo
zZ;uPA_t2;NjB{mz9rlu2ST@_@iV~cS@t=M2_^pGUVj;k<4I%Mx7_j~W{38<V8H$te
zwc=N2{X4YY=g*w8eh%$Qs>e};7_}}7ZD{A5s_qHlKdIz%j2N^Jj3re~a07y98%UQe
z$syTj)TF-*>a(Ru7jVLg<P)+i3|zPY<ozam8IsFM#QOQL9a^i~2WQY{+1Ra*?J(Qj
zKpa54KxO>}38c!XMj?%%lvN5cL&$oVNfI?msl1i9+{SN>Gbcz}6kNN`r`XW;CSXov
zuCV5O;YdHKR?%!gz1LZzUdtgK;qVaaYcD)Tlj-+wq>}dpxgK=-gt5&oXjU}ww1fk0
zS6@%#2pzi{?{ySVKT1VH@RT~w*FV1+b;ck=;<nZOax0l^@T9te@*dRHVy6B?<F~#}
z3H+;Uz9t0F0IoyhPWfkcyZxL>vH#6(on1U_Or8JXwtZ^0_Rj_MQP}bccIs}GLoH8Z
zpp{Oex>O|lyTI4An{MttFmfgya`#6NNjB`Zu5qk^1J;?kcYMDhq;uaHhq%hA;#^5U
zo_Ry^F6k(+Ts5$3&14NZf9aE|QZAAFN%BwN+V}PnqO)9SFmFA;iugj6(Zkl3hqhpR
zUSaxHL0thRQpg*xwUAC!%F=70+@4bKCihQFc!AC)jv>QY7ed^u&-!P*DpM&oeJ9xO
zrv9FfyY=Ns(;@6AK}*QA>G=#K8F)Nqdj*YaXSAMnE|v1$s4bEy^b?B@4#g&N7Ji}t
z;;$5H6#T!v`75u_#0&7>!IK4V9mrC&7Rh$J@@)cXU(D*l;O*1yZ=C^z-iK1|Dj-Mu
zZ8w2I{6Oogr8k(fu(8HhQuIeq9VJ*wNV_q8l8%j_^F9{a@&o_y=5bbB3tyYXw8sJT
z;k|zG$QIw1r|>0xyStLj_*Ojc5s5*7on)HCtCusMb0Npix4zO}8^w0TarWC???F=T
z@|#Fhj@UWs$ubB+T`dc*x_TCK6bcJltlhQM?{8}AC+)Ly;?{+Zumd@kw>nwywSD4P
z=y*_z+w^JWhaNk*0$$F9%d(H)u*CA3RRV&86bij&$7_iO9PbxL5<eyer68?<`^nd4
z{K~hruDXj?%f=devG88fV_N=!LpLg9edb|-YwnO&$4wTM*iQ&{x_<<VY7&4^=Vr7r
zt+I^MzZvLIJY0t}jke9MVE4karDY$QTb1tY_zIOlXLAeJ-gMou_+pHm*-l*3bO}wa
z>pT(0^r_LX>}JkAt#e7+pH>7w$)xCEbdVi~=8&%m!TsWxBBJol_rsi>5X{k^NHl1v
zQqHlE^~O5adL@<;Z;ozxm|PqFrP-5tAnuW3#l~xN--Ybk9{WaHY62e(z~zuKG7GmP
zG=|6L&t^Dp2;=qM&i$&3V_NW9U$$b$6n{jACAYrE)V@)thKY*EIftJGHbb5I598k;
zbSHvHvqt~YvGd_em_C&aI;Bn9iFWOmn$TLs-r6T>3?B}`D<pgFqL=oZ!Z97J7(M+S
z++C4%7I@z0J*{JmVv`Vm_G=5XR+GST3U$Gd5VbexO^S44ZVIuzF5eze6LOIVF;?oX
zndeYJAZelm1u8P2LSxMC^?STd;(b9Zmr^6X<OGlu-U0?;X|#$)^qXO*cYzyo_z-(5
zeQRBkihh2xusl_v;<SCsZFtJjXIbw$1)R{cJKtU*Z{KY5hWI(={0Z#|C{&xy)#y_p
z@m^)dLH|q&o0H!DXA|ksvpKD{xufH&^(_89bn`|*d$#WYhn}kdyXJ^a%S)AyQCY3E
zttZ#>>;hFap9<`l3n%Kw&t_(*1Y4Ebq{5jS2ft4^*q8paFNE8xLG_#3%%QFmKJIOw
zqbpb=FMcKOj!%hOSxQN(6@q<BF;3MEXQynU^u+~tWJQ@YsZ{Jfc`BUv-?zn@?EWgy
zEtUK>$+w>fFjtKyK88^RJ3tSE8<g@fWO_!%#yEosWZXGNudDUt<dPBdxSJVppNQ2+
z<*k|F=(-yz9mJk-o2S{MHjoL$?5^v(E0LAUg)HX!M6*+8Rk?W+mTJQVIg`R5A<Q7`
zxOFQZO76J==(s?q_uS@2Fv#hfUaL6W5ETLJcvKBYkuD5I!x<?i=ftT#VM6kfQ*o@u
zQJH@4spr*q1UbO@Ky8**&{?DJ3DYQn*U%y*QcnvdpU4V&F63fb{`N9cstq01m#fv9
z*_75T?VkpM?_qz*$!uATBq$D()^he@r<&;nwxJccXDIHlI&s^#VmH0fkV%(m#!xdS
zD+rK0d;y;nZzXC2?{;VESDAypREPZ$)lBm^-?>9n3QT=yF1q={c2$E`UD(BSNt@e+
zk$dC`hfbC$lecdll&EtRc=t|*d?ooEYdbQeM%<nxpOnKf=tS?zY+1s2Q!SoS)DSlr
zm4#j@yg5QC+T}OVID~|Hg(xuZUM%_(Bn@=`i9?Cg=)!s7E<}O%R?C}5T{ier+Yj?@
z3Q-2*zl|MN=e(2R-C$ubp570T!-e+RL1GdJ`X`wnlr6lQ0754w`Vnk|;l3mn(N~`2
zBMx#``ebAf!!DLIyLK!}=4#4HwUCag5`?SfrRu4lSZ{NL_F|X|HirLKz{&lps18VI
zMTT;H+1yRi!OZ&jmb4qzDE1V+$QMhF*klS@1i4N!81ZG7-RK0q^F3LHP1CWYompCR
z)LJN8;jVw+FA^G2zH^EU7K&89`$|k=Y3(onTWnz%n~)5rCW8EEswcf}gT)ZnnR`eu
zRvCSAjh?|H3bLj3g1MI=fAt-sC?24#h5tb&mO7K9x0!Gv90OJI%Kj*pgJcxN0(V<7
z3QZ<$GMx?1Z%#loQl%$POuvipo%xWO^&zkO{DJOpVk(ig+s!{ieWaWAx=i8r3ue6}
zn&J6qk#Y6}m8_+Nu;*<FH1_>^ney6MYIkt3)2j-4u}XuSGVK8TJh0;z>;ga@#JLg%
zp%~{?<H^pGIEdzuZ8bD-{Mb_Cidx1a&a+G{nlX%a=+R`|#jN^j#Di;b(HH}L=}9oz
zG7ZcN6Pk470g2n_oPKD10%$N{C^89F8Y{MrvJj_$3dOnc(M;hNqJgWwMM}Lpe#)w(
zYOKTKSk81bE=my_bvc-#3;>-H?^FR^lA83}iYj=uNEd}j^)GR{BY)Bu*lu+uuiceW
ztWJZ!BBY(%&av7~jn6%m$&9q?gA!Y4Lev7?K2d+ocz;8HWJrXI{LO1H>^y1@(WFT+
z#7KgF@U{Pw4%DGv%a7VW;pFvxqlS|+yl6kiR75HQr6`}=_od-P2w=TxcWzL$Z>gJy
z7@FE~#<}jL8e{qdX_$^t8@NakYjDCpXpJ<f%1bhIvAX7cd-c(b;2=$!vpK)S%7|l4
z=7cM1V@E*ihQ1C@g&@kELx%l|$-t?q!K$Y^LoqN_@Q~5Y>meIl(SJR`dT%#5`x5^Z
zceErVv&WjKh9|8~3aCKs@cc!8jo3h?t!G`+)|yQ%4PR#hlLXJ<LMRIDI8KJtwrew#
z6Hk}_)>Lh}`H-@Gr;?r(TsmdTSS+n6-VF4+_#qP`TuyEk8_=Z?=Mn~I_f{?ymTVa0
zBA14P^;Gn&Lc?=QPn8yw({N~vgao!Qu*bLGHP~Z{(uy!f1JQ?SloY<*y(VTqWPBJ>
zTNx1Sn8}#1MoG%yZL$SzgevRC8I6beyk^CWkRIQJq*ffrVopUf2#cd9gM^Z&OMr5O
zyRiovNSbdL)~jubA_`NFvf=!V68~#sGa$FyJPIK?o(feo;j{DnW3O7yJowky2lKC-
z+oeI8))`!Cx?K~?FR~qX8B{{LxHMy?Fe1O#<Y^d*4!{H7Yt~_vKGdQ3a1`C4ZK`<i
z!4!8(Z`FDG=p$X*Z#Iq^?d1cPpd(fl-Ga6%64y$}X@VW^AB!XMPHzVU1+Q_I9?<?N
zc=gB(q=+a!il4Sr>S>th5P+a~WyLRG!r*ntJ#N87-6wDxzQF*S_p}9P<Un)L_hqtN
zZd=Cfb!`KQAKb!Cdn?s@E8J;RBRzM>wvzNJk_-r^qIF*}TOm{Oou|b^T*-3K2s!bu
zWMGSLx_Wd9{T0(E6)Y%A7~pTTvM;%k^0FG=4lLAAB+e>iN(eZr902iJ?z7qzOm{ij
z-G+E88w9L7v!7NFeR=Sv@P`-uKYQ!CfX2auN`Vci3FxqOJ<K-lE*zJUAx(^BruhRB
z_aD0z#AsKrFqp>tML~2M3A+f)J_h%ws*J;=oduOio-ZN|)ruvn@(R0Aea)yixHfR4
zeMjZnMqIA0nOKys3)Fl4#m3bb71y5d6;!<(z-rkG9Pq77I3;6CzQHgx#M8TjRZ>ZN
zlJre+N9SGf4#eE>dH~9lz4tP7xr%0<AB{MjC-MIx4-xoWYSwYo$Gz1=!uaoeI^moR
zg5aJY&dx^u|4zf6dHsJ@8P@Y`OcL?zWx^CY{p<gq{{P1e<dlHV@rL}rj0FTD{4e-?
z+NVrVqI#83$c~qQ%z*dYJ^bG~Nr>i9LO4xW=MZ@PudjdZtUM7xApHL`_}7k}zzr5g
PNJwx7)1qp#{iFIHVRL6s

delta 6036
zcmZvAbx_>R((VEY?hxGF7M9=^NN|S`AZTz~+-><`L4pKWJh%jRUo5Z$2^I+ME&&3;
zCBT>WoO|y%_0{cvrn;W#?wXmN?x#A<Jj=Xx0UgjBgS7od1A*+ZKp+AT2;}R?=i}lI
zwQzBP^7=YCR_dC#6bX~UYNj58ec8>rUS?F9ft-p)xl}$Y--Xkh;Oc>N7OmJmTy|ek
z*{~_==>Bjl?u)GxdvUemQ9a&$h|mjQGS-^m^YpaYr`qS%`w`Ue9WZ*PPzrsu(bW$=
z0ES)xY_A^Oe;@Ge8uioDQv^(b^2l*I6WycKb>86Z*NR%A?VDSV`bM&<-Sm^9vA2@p
zG1f!X7wZEXkd3pp;pw@dJu^_l#8_QRMSrm{*GUVkb9q?|H<^QSoi*_h`hlL(w<4fE
zG^et_92u{wij86QanT?<#zWfEM%qYefc_!zX&6itEA<7<^uz~&o|#kJywQyF$m>LF
zrgPq>?H|QN@rH!N{0hn=6!n%KS-2mgw!1?a4@xEom7tR<xI=d=nLb-K2jsiZg`{=^
z1QlJxajQ;>gICqpS35!}MW%ZwYV)Zb0X+kZwDvd^&Ge#5B7E`gT|0*AGro@70Csp_
zFox}QkX3*KJ0`O|>$+5`)sR1y_|m{dL*}R2G(5ETP?4?iM0eDFRM^W($OczG!3Nc^
z0=1untyb_h*OE$nfVt<mZ9z{KZtP{lDf%G(fg4m0ujJM1r8JAc{`$%c9&TgjIk+DL
z4(YCsw{o>(rPLR$rA1=n4TA*$@fcN0C>Ti${Z7yr>s;Db<*j;60wFuY7!*!0iO|-1
z5nDhbDw-lo7a$ssWzZYRx?*Aa$>lIi+W$hD0#m=7i!Iq&WBJeBF>5I9>9tuBbV%qL
zmH|V{U)o?#P!IO3T4_m9B~*CZ^~8D0*VKa2+=e{D=nEm4w^{2Tb|nko5#~twCm7O7
zuaEn&ejchF!s|&Mer3LoeT>gN_(9RIo7h)*nc*Uo^n?~C0`+;u4(8bTk6vsOf}5YB
zn7Ure5-QCbx=jw{N5AafHGreUV<jPrXKugdA<G&SSB<H7gY$R=o9#8>&Qy7^324On
zo$A^~dwhNgh+mU#u^g^|BT_xJEAiIe$_fR$P;eA3IK-y1L#?3`hT;4}=YU;@m(rMj
zp~~UjHM=zO8-G#Oz)yxC15GwTg4NI>Zr1{THEL$7`NR*|hg%|yCG6?JsES*L%4YJ_
z=G*bMt>#wL`8bYg&NjQI3ATPe%ecynxE}uA+}77?*L%*?HL*5eqt!oL^(>-Nhet-s
zL*~mBczy~;XBg#!mT3a%2Oeg1*z8Gt?{Cy(ut$wAYenxnpk61U9Q+0v%6hF?ivE1m
zQi$xXWgzX&>CU3_%rp>Q)^XhZ&=S`|2vgNqGG5AVb-THD@bcPNak(k);L}*nGA=f7
zzkS%+VY{(Rc?=>F?fo+lo~hF_EjlzMIk>gK*Wik+gbMVZW-21kK_FX35QySGWx>PF
z+S=2D?_Uf5KlLFCz8L=rLKtz(eL>5!ih+hgT#%q!?>Lp>uYKWrl+IWe%HNa?FYgM#
zn4>9D(rqMOLy=sSM*dODjW1^kfav`cFAUk$L2df}zH*lG{x?jFLdSx(1_v<Rr+V&>
zs+wy<FRezf;CO`og@|QJxR81`6U$Q1QO50HAKf8P$DyzlY1ClT%>N~ZS6P?m{63tM
z7rH>gwnG%btIEkRYf+NtE!8LUm3GEOq`uBJllbErsmP*<0HL8!)Y~BygbqfrT=dWZ
zVkV-7>+p_3&N!j4hq=$gHx<~=S>@#9jBQ4E(j^(ZJA>VEs#21ZPr@Dv%tNCn=Z3w5
z_1|1~C52x1V%OBJOPCKmoeAO<m>*&)1WxU8aFKP=Q8~MOVH_jy9*gvnunCJqMf)5U
zzQQfOY;~My66ba%^l!0aVMEWOIX<aNpxr0C){5}E1zzBB5#npH<N<yAI|9kSD9f?o
zg~>@Xb=*}gPCvPwPqq8JIjb<qHGZu-1!UzN09Oc#LXP&*mHu7q)a>DIiq|qjJ}GzQ
z1i3QRBKWG&byqWz@QviKXM2Xpm%XjU*d~mqHF86jSF*FeEnDcU)`cnYbX-WU_g%wD
z4>YPs_zgq6ROfp+i2#X%#@lHZf|Wcee8stS*UpW8HyYMWYP#lIdUTCyK2FN`LLBys
z&_T`GNDu97$ic!51_<={_%~A?ACoi8;}>Lb-`9k?2fs?nc)}IhSi4Ra#dI;-Y^b+*
zd8sZ*X4!4Pef@4b=-a2?bK?r_-H#p1FHHI9(~uAMv$@lETEO)5&xgBH@7#xr&I&!?
zz_l&t@%QnCclXN5iwK9as_1ERAbUQIlCHejQ2o&@w;(riH0a)MuBJwfhJ+us_cHu8
zq>_C!(sh$4XFdPJX+voiVOiGu{qqvJ^XW{2yAL5mXtqRJk-4u=0%vR}@Cg(p*yQNC
zCZeTSEj|%^3<3TP6T~BPZG20^dC?Vcv*cy=HO8+zCp82rJ^3LPYdokbJ}L)iABB*J
zzB{rUCPpS!3JQK#G8C`I6^};B8LufzT2{Fk?T_!13Elj0V<|*qg2nG=&hi8*=}6I-
z(Z`tbJP+cA8*ThcKbHskzD83Pn<S4^VdV`vG6I?^uA*lyNMn!v7fy>c)Y)kH$<kC}
zJCeQFHKAQoWfRH5=YxrqWy_RT-c7dI;>z~B;Ys848QLH1?S>ydbp)B;rr=R{yfSD%
z{8E(<Znrg%xClk~XmCK;6;=Xl?T+Qpz|iqV;hee6CCyl>ZogLtJ$&sx+8xTv6}&kD
znE>xtEOARSe%momOo^95^loy!Z`<iq8_C7!_E-atDX)}C-FaZ$#g^XG#$-$C*Wuz~
zr1_6zBUPt#Bj!;jgATzK{L#3sQrdRmB_~6(Gt^ohQ-o3EFNJqc+>k3&_AbL4#hqSu
zF6Z)u<42{dOWQ8$`!2M%)B@$xxM5He7@#4@V;T|CSWU-f*sdQrHWZ6UG0(25h>S*e
zYt*nQ^0i!EnnoUxOU2F%#S)8DHh5h?CR{!Kfm+Q?t11$rBMf_aEk0)eTKl=))on|k
zGbGfz2i)h$r&H3)Tu)IQE@GjO7W+M}-vtO_=C3{q_89s|VyQQ;J66`B%#?ugS?a>p
zu!^eanNuk;5rNm8O?@U#w3BnX(E}Sc3Lh{m`7o=S*$h`D&AbE(`{HJ>|IuPx+&Ik@
zrrUMht0!nC?NaSsTm=ORSC$}+YgD6h$k7;g%}t|dBbQ(9jR^`39XDhmOg71_bwp3o
z28i%P_$E#LjcfZ%=*U$*0*hqRe>Lcv08c-fzXw~2N>8-^7%?Q5h9tt<5H(EKhB@;T
zOl-T#g^9+K{o^D`;a||+(Fli|W$Aw@$n}EZ^{AN*iAH^!<W8z8G8247!Kgwce_>x$
zsE3#_53O?~{=nbY#va>tty|jg-g6x(1zw1tt9pvjOo$)hiPH#tTa3$H$o}glLPXK+
zZ?weAzkC0NUf>o`S=u}M|10Ao{{_(81!g~HUeo`-BJn2nC3de2u6zx#J4ixTE<r=z
z8R5E(rSQo+w25-kzI1Ir8@s{vYW40UlT<+*gVxW~te{5{F^cliFV^}~QSLIc@mDpv
z_e}3|Jd0_287!WbHygB**W03Mf1VGm8=-@j+MCHn%>yj-KRD|K{10e)W2UYzOU?&7
z6_0MIJevJuR??KLj>zHpXrv7&qyPp$VqK37`^$uDa6urvf44jjPd`U%kAM1HpT3F9
z4+tqtaYOoT(0G=Wx_t#pRCP@F6HI&AjcN6)nO^$D&3Mv2U{{>piERc?fru{-<^c<=
z9`6)!{)0$FUFO&Es3N8@zNX+$IS8##50zfFS-~w_c&)3I|4j8HXBxQnx;>RQUhFgz
zG3w_*#nff@g*r7c6pgj0P2RrJSL^xonpzpGHX1RNV%}J~%*3g*cc;U|soyI5cJPmn
z42#e^@02vCf%fpMv1_|a9VF>y<4D0?b{4f%xF>w46zemaTpXpaeO31%v!AnPm1+P=
zt6UoE_<W7$u9b#ekQ{)zwafUF=r=N;PGl!TiNs=#*5`CxkhiH`=-Y+H{1Zi)DcYWm
zn8|m#JEkGMo%&R>tKVs#nPEYcH;r+e5}xj_Z5hTCBze*4zd31IAFC=#ih<eUXXop-
zU%_d{Rw$!4C+d_po6c@s0<yV3+!6iJoO*CwRK$xMc`vF&C+-FGB=l=hr&6Z$Jh|9h
zggss0;@H$EoH@oij?2Geh*iK*YG)iQS4VNOR1$9SuvM|_QNbyo?wUuGOh5Zm-PELI
zXm9L_nD~I5#*X~(Tg6V~y8*Fd%Xi9%z9NZ=yABroE=6FnW&8VU-^pio_&mAiN1=YA
zsV_JZ*lb@dwVJ&E(jq<FFJ_aKj;z>@7MXg!yP7oN$`TgSd)rQ`FB!y5h#&G-KJpk(
zB}d{tWKEk373i{FRwK`;vU?HXX7uqVPxK=HD4fJ4RfcI=+9@uRIyJ4Yni<Uw7#o^h
zaW+W$czkbAU;Hcy+vuUZ4i+xy#d`#IsAhLdjVjLNqCI2*7TKT1^<^uJW^lz+=~R<F
zj$vv)XeKjzj<z)C)HE;&_9aQr?~@K~hMTS1iKTQ5nxwE0-%F>Lp`MYc@0TzHdgzx-
zv|^R2eT?5$)7iNP8wd_G@3+5h{$)BVJcZ=VQ|(bfkX&A#JntF!9iVrT9WH*@`QlW<
z$tdax{lM`Okj9mckYb=;nLhCgVPBxkSv#H`)<2u}BJ&pj#Ln2#cusdF+_&Z7726Ie
zqxavtZ}|}>4_}w-J8@+tu-IY@zu--s_grqd#5X_r$VPkLJdbk3f8{57u__mGY@HUg
zrvBM;XY_MgVw(KR)=Z7mS#7eoR>kp-uv->P#9}2gH9eRoP0dxvPKZOWIaCP(W5ImR
z@iP8Va55M4&nLx3sly9N_)AF}Ns{QPfTFjf&eS4=UF3TbxSnOWqN$st@<gK8R%P)z
za`6m98UaP#oZv(gcU5<5?vgGA7g*{GmE59L*QWZ;WSn9t16BOUvpbk<Zg(f2yP@hq
z@`Q^xpQeTci<iagVtp5Jk+WfPY41V5e4RvU@T#Zqq}R%2_x;Y2n5JmO@+XZQ0Ct0z
z^-yNRS=UhG6!K2(C+pq(QSD#d__~M}K1uF(e0Y8yY08-VOUr`m$nALlinhG*wkio}
zvt9W^+NT`&bvJuP6c=}C(3M}NV($1^#c<8?1!dM%;A6)?XwykwrjV5#$LV~ajL$dQ
z^Ga0Cq}WR19=}9*%d7Uuw|tUhK<UW)nJBdAZ?^qh-TAz$zMhj^9yy#CLV?M~Hos`9
zic-nAj>e2#iZy7BXsfkzo`ffj@;_rs83-qYuO=srBxN;1QwB*g(X*e+JGvK3Zz^(T
z!TnjmQ>BxMweA58YWaN=*&`kg2Lj5<Yz!)@&<`gjhSoezMYZgn(eF-80aEMuVW66D
zq2Wh^KJfSu35{RQ;L@~bq_235U^NQBL(VYOupAmN)^NlrY#Mx@OLD+Sz}au&mJdah
zEv~(*|52iBy&e*Fdh=tXBd@Jh|A=SGK%*GyBv}Zv`%rL-xh3zlhgCP&R(qVFsQ
zeK?4T7cQQzlAP~-S}U6dOfHP}=FQ^6yOS*9R>bB?2^>;xY7(SHbN1{l&CxIHwEWN-
zWy)7|?J^|l#74-Mo(`)tfr+*WB!p!}RB~xDhuVbvctv7laklmLCezV8f|?9KtX)i9
z%+}T$6SefJX1EsK>Eq*GKOQC2_r8{l`hoAW>vFN~b+MPnECCjvBQH^f!s2~gvn^<q
zW8@wt10WNkLNg8t-NvHk6y6(Uls$2hNP(7dD_f>0Votc=m}GoY$aFq@Wq0vZAODLD
z(QOoy*%d>c{|I4yJc%nk?)$zQbr}V8Floo7Ls?R$kEbpVNHy57{{-rHyl;{R|DqBY
zY}pEeG`slK=;#5c6kWqC_(sGbCYOmgC<ZStIG6UhzzH9lB$*OWOL<V{xlH)4ygtz?
z6&}WJqs7@n<gd+&^_#A&n+B<+Wvp_d-J+uulW)wpBn*@wd;;{&ro(aHql-R-(96nw
z@3!m{i+plb59S_2=ZI`3Lf*UqrxB|igstMIviW^Q;-UkXE<EDth|QQnOMlSIpQh?{
zC&BJh_g`>u`@oW)e{md=qGRJ>%*n^&*;%XWo~G1Bw$T>RD^;R?J|-sV=x1{ShbNoE
zdrs0x7^1*c7-;bf%#!?lB84dIToi=MKxdU~m)U}va1~j$Abj+T7`#-=`^XSc2~pgu
zs^{mq*9Ssw1`GloZJzI9*3zz>H$6YW?Baq4?e^kM%2L3ZkLNbqSj!v-*)_ozSf@MP
z4#`)Nrmp7sSx=$LTHF+U3wj_PRT7aoCBzd3WL#?va=st2qodF!(Z#YI5%QIBy|)>w
z{IQYfX9gk4f`*#bfPnc`!D~N`%_BLxkDM%f>wpK>4=!`Z&;YXof7kH2#tpIa^#C^2
zo~Mcp{)0<SZpx$BA(i98W5b~@uw+7)kLa7EoINLL*qTK!w#o|#^<ZT$on~=Vm?_eJ
z(tXyz+o^%UX7ny+bC)jjqq?c2r3X`#gi!=?!babTIW7IlXK1rS$<6c@b9Ak7(i8Pb
zw*!2;Xi8I&Lkf;0=b097cP$oPF(Ks_)SehYdZyxS(XBE|6xxcjHo;${Q%Q{LQs0rS
zvHU^zuX^qrzkI&zPagd&q68I2Vm_X#u<ZQY{4<4}Nj~a8`dm!8U5CSfyDw1{Zefaj
zfHi5Av4Vd!QwG7rH&Jn+Ao7#|7J-=a5oRD`QTdHf9XUPob%x@JJ8jjUNmFxE$7cjy
zAtP?P8_(|cSVv)X1Y=r4%5kar&rlf!E;H-DQNC>L&GZply+0AoU1RMKY<vDEwUG`I
z^~=ye+lE144R?oKvg1nKD;FZp8+#K=zQUCGC4#L6TV2g<VnvBJMei=*W+w(yae)_D
z!HtkMSBcArgZW7>%pMVaJEFb3tYfX2P8gkOxe$EYfuWr1*V*-I%W~Lg-sRmHCXVGJ
zTgWHQV8cO&H^~I3<BmsC5n_4ZAPL$5RsD&{0XM-nYMIKRqleR_h}}^0`;P>9<HHO2
zQqND)hH~)}m#z66;3aSG10#@|jFL3ub{tJep2nKf@6p;$v2<~`Qh#Qv9tr-{sTv+w
zDN&ymdpE#eH_&jfpa8K+?^KZx(yyyeyJd$OOl~zrWEZ2U1Kc?(3$DqM+QD<5S%(g9
zkJJ~d^xmBCi|szRkFoy#^LA?tg9EIR!2oP9Vz6+xCA2@0vgP)Ppm8eQ669lLDMg9f
z+258Q;p$a+s##tsLy59b$5Mt>LEI?vnTZ1092xpE+DnPsSryRLvi#@}$^C6*AK!J&
zPoxzjD<(wRBGW<@wHLnSD5Hmw;sY9kY-t4xQr(fz1Acg=uO7v}b28g9s_D#Yl(bq3
z+jfe!el*1GM^v+W&3&!Cf&D=Q&3WpYFfN+#%h-=lnq!x8>LSDhr;Np7I*AuBxsFpm
z8pJ9_cxQ-B+h{mlbv^rX{7Kp-==M;;r0RixEgnzh*3fpF)tIN@nF<<Tl(EZCbaOIa
zmaIfbAk;GOlS|kY`B%;nAWC~jQ`u9s;DG2iu9D@S&4!|XlYxeOE;%&p|IDu8$6$4o
zpKvK&FwOs-T>WM8|3_%?;1RrXsCcAs0@7#PTB<0hU=S$?9fbK8X9WFk`u~5eK%iJS
zE*}HUf9MD3uhoBxAU#}(PX_gp6^_fs3a{bgLcL~#<Fj+ZpFjwy|CI(PAdnv_2t@SX
WRv-d6ALIo}I@}q;jBW=02l^j_xmkz+

diff --git a/ntd/explore_save_rtpa_outputs_update.ipynb b/ntd/explore_save_rtpa_outputs_update.ipynb
new file mode 100644
index 000000000..ff8ed8aa0
--- /dev/null
+++ b/ntd/explore_save_rtpa_outputs_update.ipynb
@@ -0,0 +1,307 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f123c4cb-d90c-4696-baaa-9eff164b8baf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gcsfs\n",
+    "import geopandas as gpd\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import shutil\n",
+    "\n",
+    "from calitp_data_analysis.tables import tbls\n",
+    "from siuba import _, collect, count, filter, show_query\n",
+    "from calitp_data_analysis.sql import to_snakecase\n",
+    "from segment_speed_utils.project_vars import PUBLIC_GCS\n",
+    "#from shared_utils.rt_dates import MONTH_DICT\n",
+    "from update_vars import NTD_MODES, NTD_TOS, YEAR, MONTH"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "47b1e0c0-f078-493d-8ead-3811431fc370",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from monthly_ridership_by_rtpa import produce_ntd_monthly_ridership_by_rtpa, sum_by_group"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "cec53d21-05a7-475c-b893-84ca3bb9401e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "both          95900\n",
+      "left_only         0\n",
+      "right_only        0\n",
+      "Name: _merge, dtype: int64\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/jovyan/data-analyses/ntd/monthly_ridership_by_rtpa.py:40: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.\n",
+      "To preserve the previous behavior, use\n",
+      "\n",
+      "\t>>> .groupby(..., group_keys=False)\n",
+      "\n",
+      "To adopt the future behavior and silence this warning, use \n",
+      "\n",
+      "\t>>> .groupby(..., group_keys=True)\n",
+      "  previous_y_m_upt = (df.sort_values(sort_cols2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = produce_ntd_monthly_ridership_by_rtpa(YEAR, MONTH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "32072e6c-73f8-4aaa-9340-e326e2a48942",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 95900 entries, 0 to 95899\n",
+      "Data columns (total 29 columns):\n",
+      " #   Column             Non-Null Count  Dtype              \n",
+      "---  ------             --------------  -----              \n",
+      " 0   key                95900 non-null  object             \n",
+      " 1   ntd_id             95900 non-null  object             \n",
+      " 2   legacy_ntd_id      90420 non-null  object             \n",
+      " 3   agency             95900 non-null  object             \n",
+      " 4   reporter_type      95900 non-null  object             \n",
+      " 5   period_year_month  95900 non-null  object             \n",
+      " 6   period_year        95900 non-null  int64              \n",
+      " 7   period_month       95900 non-null  int64              \n",
+      " 8   uza_name           95900 non-null  object             \n",
+      " 9   primary_uza_code   95900 non-null  object             \n",
+      " 10  _3_mode            95900 non-null  object             \n",
+      " 11  mode               95900 non-null  object             \n",
+      " 12  mode_name          95900 non-null  object             \n",
+      " 13  service_type       95900 non-null  object             \n",
+      " 14  Status             95900 non-null  object             \n",
+      " 15  tos                95900 non-null  object             \n",
+      " 16  upt                52124 non-null  float64            \n",
+      " 17  vrm                52282 non-null  float64            \n",
+      " 18  vrh                52282 non-null  float64            \n",
+      " 19  voms               52486 non-null  float64            \n",
+      " 20  _dt                95900 non-null  object             \n",
+      " 21  execution_ts       95900 non-null  datetime64[ns, UTC]\n",
+      " 22  RTPA               95900 non-null  object             \n",
+      " 23  _merge             95900 non-null  category           \n",
+      " 24  previous_y_m_upt   51915 non-null  float64            \n",
+      " 25  change_1yr         48466 non-null  float64            \n",
+      " 26  pct_change_1yr     48439 non-null  float64            \n",
+      " 27  Mode_full          95900 non-null  object             \n",
+      " 28  TOS_full           95900 non-null  object             \n",
+      "dtypes: category(1), datetime64[ns, UTC](1), float64(7), int64(2), object(18)\n",
+      "memory usage: 21.3+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "904cf3b9-f2db-4b24-a8e2-bf93fdaf4966",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def save_rtpa_outputs(\n",
+    "    df: pd.DataFrame, \n",
+    "    year: int, \n",
+    "    month: str,\n",
+    "    upload_to_public: bool = False\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Export an excel for each RTPA, adds a READ ME tab, then writes into a folder.\n",
+    "    Zip that folder. \n",
+    "    Upload zipped file to GCS.\n",
+    "    \"\"\"\n",
+    "    col_dict ={\n",
+    "    'Uace Cd': \"UACE Code\",\n",
+    "    'Dt': \"Date\",\n",
+    "    'Ntd Id': \"NTD ID\",\n",
+    "    'Tos': \"Type of Service\",\n",
+    "    'Legacy Ntd Id': \"Legacy NTD ID\",\n",
+    "    'Upt': \"UPT\",\n",
+    "    'Vrm': \"VRM\",\n",
+    "    'Vrh': \"VRH\",\n",
+    "    'Voms': \"VOMS\",\n",
+    "    'Rtpa': \"RTPA\",\n",
+    "    'Previous Y M Upt': \"Previous Year/Month UPT\",\n",
+    "    'Change 1Yr': \"Change in 1 Year UPT\",\n",
+    "    'Pct Change 1Yr': \"Percent Change in 1 Year UPT\",\n",
+    "    'Tos Full': \"Type of Service Full Name\"\n",
+    "}\n",
+    "    print(\"creating individual RTPA excel files\")\n",
+    "    \n",
+    "    for i in df[\"RTPA\"].unique():\n",
+    "        \n",
+    "        print(f\"creating excel file for: {i}\")\n",
+    "        \n",
+    "        # Filename should be snakecase\n",
+    "        rtpa_snakecase = i.replace(' ', '_').lower()\n",
+    "        \n",
+    "        #insertng readme cover sheet, \n",
+    "        cover_sheet = pd.read_excel(\"./cover_sheet_template.xlsx\", index_col = \"**NTD Monthly Ridership by RTPA**\")\n",
+    "        cover_sheet.to_excel(\n",
+    "            f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", sheet_name = \"README\")\n",
+    "\n",
+    "        rtpa_data =( df[df[\"RTPA\"] == i]\n",
+    "         .sort_values(\"ntd_id\")\n",
+    "         #got error from excel not recognizing timezone, made list to include dropping \"execution_ts\" column\n",
+    "         .drop(columns = [\"_merge\",\"execution_ts\"])\n",
+    "         #cleaning column names\n",
+    "         .rename(columns=lambda x: x.replace(\"_\",\" \").title().strip())\n",
+    "         #rename columns\n",
+    "         .rename(columns=col_dict)\n",
+    "                   )\n",
+    "        #column lists for aggregations\n",
+    "        agency_cols = [\"ntd_id\", \"agency\", \"RTPA\"]\n",
+    "        mode_cols = [\"mode\", \"RTPA\"]\n",
+    "        tos_cols = [\"tos\", \"RTPA\"]\n",
+    "\n",
+    "        # Creating aggregations\n",
+    "        by_agency_long = sum_by_group((df[df[\"RTPA\"] == i]), agency_cols)                                 \n",
+    "        by_mode_long = sum_by_group((df[df[\"RTPA\"] == i]), mode_cols)\n",
+    "        by_tos_long = sum_by_group((df[df[\"RTPA\"] == i]), tos_cols)\n",
+    "        \n",
+    "        #writing pages to excel fil\n",
+    "        with pd.ExcelWriter(f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", mode =\"a\") as writer:\n",
+    "            rtpa_data.to_excel(writer, sheet_name = \"RTPA Ridership Data\", index=False)\n",
+    "            by_agency_long.to_excel(writer, sheet_name = \"Aggregated by Agency\", index=False)\n",
+    "            by_mode_long.to_excel(writer, sheet_name = \"Aggregated by Mode\", index=False)\n",
+    "            by_tos_long.to_excel(writer, sheet_name = \"Aggregated by TOS\", index=False)\n",
+    "    \n",
+    "    print(\"zipping all excel files\")\n",
+    "    \n",
+    "    shutil.make_archive(f\"./{year}_{month}\", \"zip\", f\"{year}_{month}\")\n",
+    "    \n",
+    "    print(\"Zipped folder\")\n",
+    "    \n",
+    "    fs.upload(\n",
+    "        f\"./{year}_{month}.zip\", \n",
+    "        f\"{GCS_FILE_PATH}{year}_{month}.zip\"\n",
+    "    )\n",
+    "    \n",
+    "    if upload_to_public:\n",
+    "        fs.upload(\n",
+    "            f\"./{year}_{month}.zip\",\n",
+    "            f\"{PUBLIC_GCS}ntd_monthly_ridership/{year}_{month}.zip\"\n",
+    "        )\n",
+    "    \n",
+    "    print(\"Uploaded to GCS\")\n",
+    "    \n",
+    "    return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "2eafa973-34c7-4cd3-ba96-3af84ab81453",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "creating individual RTPA excel files\n",
+      "creating excel file for: San Joaquin Council of Governments\n",
+      "creating excel file for: Orange County Transportation Authority\n",
+      "creating excel file for: Transportation Agency for Monterey County\n",
+      "creating excel file for: Riverside County Transportation Commission\n",
+      "creating excel file for: Metropolitan Transportation Commission\n",
+      "creating excel file for: San Diego Association of Governments\n",
+      "creating excel file for: San Bernardino County Transportation Authority\n",
+      "creating excel file for: Los Angeles County Metropolitan Transportation Authority\n",
+      "creating excel file for: Placer County Transportation Planning Agency\n",
+      "creating excel file for: Kings County Association of Governments\n",
+      "creating excel file for: Stanislaus Council of Governments\n",
+      "creating excel file for: Kern Council of Governments\n",
+      "creating excel file for: Santa Cruz County Transportation Commission\n",
+      "creating excel file for: Sacramento Area Council of Governments\n",
+      "creating excel file for: Santa Barbara County Association of Governments\n",
+      "creating excel file for: Fresno County Council of Governments\n",
+      "creating excel file for: Ventura County Transportation Commission\n",
+      "creating excel file for: San Luis Obispo Council of Governments\n",
+      "creating excel file for: Tulare County Association of Governments\n",
+      "creating excel file for: Shasta Regional Transportation Agency\n",
+      "creating excel file for: Butte County Association of Governments\n",
+      "creating excel file for: Merced County Association of Governments\n",
+      "creating excel file for: Imperial County Transportation Commission\n",
+      "creating excel file for: El Dorado County Transportation Commission\n",
+      "creating excel file for: Tahoe Regional Planning Agency\n",
+      "zipping all excel files\n",
+      "Zipped folder\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'fs' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[47], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mYEAR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mMONTH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43msave_rtpa_outputs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mYEAR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mMONTH\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_to_public\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[46], line 74\u001b[0m, in \u001b[0;36msave_rtpa_outputs\u001b[0;34m(df, year, month, upload_to_public)\u001b[0m\n\u001b[1;32m     70\u001b[0m shutil\u001b[38;5;241m.\u001b[39mmake_archive(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     72\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mZipped folder\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 74\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241m.\u001b[39mupload(\n\u001b[1;32m     75\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m     76\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mGCS_FILE_PATH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     77\u001b[0m )\n\u001b[1;32m     79\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m upload_to_public:\n\u001b[1;32m     80\u001b[0m     fs\u001b[38;5;241m.\u001b[39mupload(\n\u001b[1;32m     81\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     82\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mPUBLIC_GCS\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mntd_monthly_ridership/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     83\u001b[0m     )\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'fs' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "os.makedirs(f\"./{YEAR}_{MONTH}/\")\n",
+    "save_rtpa_outputs(df, YEAR, MONTH, upload_to_public = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "400de2ab-2704-4998-85f6-a8e886689ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 639bd555186456f284cf4c11ee3bc7d85da38c5d Mon Sep 17 00:00:00 2001
From: csuyat-dot <christian.suyat@dot.ca.gov>
Date: Thu, 19 Dec 2024 21:43:42 +0000
Subject: [PATCH 17/19] moved draft function to main script

---
 ntd/explore_save_rtpa_outputs_update.ipynb |  2 +-
 ntd/monthly_ridership_by_rtpa.py           | 50 ++++++++++++----------
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/ntd/explore_save_rtpa_outputs_update.ipynb b/ntd/explore_save_rtpa_outputs_update.ipynb
index ff8ed8aa0..a2a8cf423 100644
--- a/ntd/explore_save_rtpa_outputs_update.ipynb
+++ b/ntd/explore_save_rtpa_outputs_update.ipynb
@@ -168,7 +168,7 @@
     "        cover_sheet.to_excel(\n",
     "            f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", sheet_name = \"README\")\n",
     "\n",
-    "        rtpa_data =( df[df[\"RTPA\"] == i]\n",
+    "        rtpa_data =(df[df[\"RTPA\"] == i]\n",
     "         .sort_values(\"ntd_id\")\n",
     "         #got error from excel not recognizing timezone, made list to include dropping \"execution_ts\" column\n",
     "         .drop(columns = [\"_merge\",\"execution_ts\"])\n",
diff --git a/ntd/monthly_ridership_by_rtpa.py b/ntd/monthly_ridership_by_rtpa.py
index 2998755ce..4fda7ba60 100644
--- a/ntd/monthly_ridership_by_rtpa.py
+++ b/ntd/monthly_ridership_by_rtpa.py
@@ -99,7 +99,7 @@ def save_rtpa_outputs(
     upload_to_public: bool = False
 ):
     """
-    Export an excel for each RTPA, adds new tabs for: READ ME & agg by agency, tos and mode. then writes into a folder.
+    Export an excel for each RTPA, adds a READ ME tab, then writes into a folder.
     Zip that folder. 
     Upload zipped file to GCS.
     """
@@ -119,46 +119,50 @@ def save_rtpa_outputs(
     'Pct Change 1Yr': "Percent Change in 1 Year UPT",
     'Tos Full': "Type of Service Full Name"
 }
+    print("creating individual RTPA excel files")
     
     for i in df["RTPA"].unique():
+        
+        print(f"creating excel file for: {i}")
+        
         # Filename should be snakecase
         rtpa_snakecase = i.replace(' ', '_').lower()
+        
+        #insertng readme cover sheet, 
+        cover_sheet = pd.read_excel("./cover_sheet_template.xlsx", index_col = "**NTD Monthly Ridership by RTPA**")
+        cover_sheet.to_excel(
+            f"./{year}_{month}/{rtpa_snakecase}.xlsx", sheet_name = "README")
 
-        (df[df["RTPA"] == i]
+        rtpa_data =(df[df["RTPA"] == i]
          .sort_values("ntd_id")
          #got error from excel not recognizing timezone, made list to include dropping "execution_ts" column
-         .drop(columns = [
-             "_merge",
-             "execution_ts"
-         ])
+         .drop(columns = ["_merge","execution_ts"])
          #cleaning column names
          .rename(columns=lambda x: x.replace("_"," ").title().strip())
          #rename columns
          .rename(columns=col_dict)
-         #updated to `to_excel`, added sheet_name 
-         .to_excel(
-            f"./{year}_{month}/{rtpa_snakecase}.xlsx", sheet_name = "RTPA Ridership Data",
-            index = False)
-         
-        )
-        #insertng readme cover sheet, 
-        cover_sheet = pd.read_excel("./cover_sheet_template.xlsx", index_col = "NTD Monthly Ridership by RTPA")
-        
+                   )
+        #column lists for aggregations
         agency_cols = ["ntd_id", "agency", "RTPA"]
         mode_cols = ["mode", "RTPA"]
         tos_cols = ["tos", "RTPA"]
 
-        by_agency_long = sum_by_group(df, agency_cols)
-        by_mode_long = sum_by_group(df, mode_cols)
-        by_tos_long = sum_by_group(df, tos_cols)
+        # Creating aggregations
+        by_agency_long = sum_by_group((df[df["RTPA"] == i]), agency_cols)                                 
+        by_mode_long = sum_by_group((df[df["RTPA"] == i]), mode_cols)
+        by_tos_long = sum_by_group((df[df["RTPA"] == i]), tos_cols)
         
+        #writing pages to excel fil
         with pd.ExcelWriter(f"./{year}_{month}/{rtpa_snakecase}.xlsx", mode ="a") as writer:
-            cover_sheet.to_excel(writer, sheet_name = "READ ME")
-            by_agency_long.to_excel(writer, sheet_name = "Aggregated by Agency")
-            by_mode_long.to_excel(writer, sheet_name = "Aggregated by Mode")
-            by_tos_long.to_excel(writer, sheet_name = "Aggregated by TOS")
-        
+            rtpa_data.to_excel(writer, sheet_name = "RTPA Ridership Data", index=False)
+            by_agency_long.to_excel(writer, sheet_name = "Aggregated by Agency", index=False)
+            by_mode_long.to_excel(writer, sheet_name = "Aggregated by Mode", index=False)
+            by_tos_long.to_excel(writer, sheet_name = "Aggregated by TOS", index=False)
+    
+    print("zipping all excel files")
+    
     shutil.make_archive(f"./{year}_{month}", "zip", f"{year}_{month}")
+    
     print("Zipped folder")
     
     fs.upload(

From d9b2094915a4fe4932c126f1fed1fa200cde615c Mon Sep 17 00:00:00 2001
From: csuyat-dot <christian.suyat@dot.ca.gov>
Date: Thu, 19 Dec 2024 23:00:40 +0000
Subject: [PATCH 18/19] updated readme to follow the template readme in the
 portfolio directory

---
 ntd/README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 6 deletions(-)

diff --git a/ntd/README.md b/ntd/README.md
index 873d76444..cca2300da 100644
--- a/ntd/README.md
+++ b/ntd/README.md
@@ -1,10 +1,57 @@
-# NTD Monthly Ridership by RTPA
+# Monthly NTD Ridership by RTPA
 
-Provide CalSTA with NTD Monthly Ridership by each regional transportation planning authority (RTPA).
+Provide CalSTA with NTD Monthly Ridership by each RTPA. 
+
+Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf)
+>Caltrans will provide all RTPAs with a summary report each month that meets the requirements of this statutory provision, drawn from the data reported to the National Transit Database. The data will be drawn from the NTD at: [Complete Monthly Ridership (with adjustments and estimates) | FTA (dot.gov)](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release). RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed.
 
 This report shows general ridership trends by transit agency, mode, and type of service. Reported unlinked passenger trips are reported, as well as the change from the prior year. For example, July 2023's change would be the change in July 2023's reported values against July 2022's reported values.
 
-## Datasets
-1. NTD monthly data: https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. 
-2. [RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true)
-3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis).
\ No newline at end of file
+## Definitions
+- **FTA**: Federal Transit Admisistration.
+- **NTD**: National Transit Database. A reporting system that collects public transportation financial and operating information.
+- **RTPA**: Regional Transportation Planning Authority.
+- **UZA**: Urbanized Areas. An urbanized area is an incorporated area with a population of 50,000 or more that is designated as such by the U.S. Department of Commerce, Bureau of the Census.
+- **MODE**: A system for carrying transit passengers described by specific right-of-way (ROW), technology and operational features. Examples: Bus, Cable Car, Light Rail.
+- **TOS**: Describes how public transportation services are provided by the transit agency: directly operated (DO) or purchased transportation (PT) services.
+
+## Methodology
+Ridership data is ingested via the `FTA Complete Monthly Ridership` report, per the SB125 guidelines. Then filtered for agencies residing in California UZAs. These California Agencies are grouped by RTPAs, then aggregated by agencies, mode and TOS. The processed data for each RTPA is saved to a public respository, see datasets below.
+
+
+## Frequently Asked Questions
+**Q: Which agencies/transit operators are in this report? Why are some agencies missing from an RTPA?**
+
+Per the [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) webpage:
+>File Summary: Contains monthly-updated service information reported by urban Full Reporters.
+
+Urban full reporters, that submit monthly ridership data to NTD, are included in this report. This report tracks data from 2018 to present. If an agency is not a monthly reporter, or has not reported data since 2018, they will not appear in the report.
+
+
+**Q: Where can I download my RTPA's data?**
+
+Data from this report can be downloaded from the Cal-ITP public data repository, see `Fully Processed Data Download` below. A Google Account is required to access the repoisitory. Once logged in, navigate to `ntd_monthly_ridership`, click the year-month you want to download, then click `download`.
+
+The data is a zipped folder of all RTPA data for the year-month.
+
+
+**Q: How can my RTPA/Agency meet the requirements of the SB125 Guidelines regarding how "to make publicly available a summary of ridership data"**
+
+Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf):
+>RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed
+
+Hyperlinking this report on your RTPA's/Agency's webpage is a common method to meeting this requirement.
+
+## Datasets / Data Sources
+- [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) 
+- [California RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true)
+- [Fully Processed Data Download](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)
+
+
+
+## Who We Are
+This website was created by the [California Department of Transportation](https://dot.ca.gov/)'s Division of Data and Digital Services. We are a group of data analysts and scientists who analyze transportation data, such as General Transit Feed Specification (GTFS) data, or data from funding programs such as the Active Transportation Program. Our goal is to transform messy and indecipherable original datasets into usable, customer-friendly products to better the transportation landscape. For more of our work, visit our [portfolio](https://analysis.calitp.org/).
+
+<img src="https://raw.githubusercontent.com/cal-itp/data-analyses/main/portfolio/Calitp_logo_MAIN.png" alt="Alt text" width="200" height="100"> <img src="https://raw.githubusercontent.com/cal-itp/data-analyses/main/portfolio/CT_logo_Wht_outline.gif" alt="Alt text" width="129" height="100">
+
+<br>Caltrans®, the California Department of Transportation® and the Caltrans logo are registered service marks of the California Department of Transportation and may not be copied, distributed, displayed, reproduced or transmitted in any form without prior written permission from the California Department of Transportation.
\ No newline at end of file

From 9672baac18ba4a2196386d1418f6b9d8c64fcd0e Mon Sep 17 00:00:00 2001
From: csuyat-dot <christian.suyat@dot.ca.gov>
Date: Thu, 19 Dec 2024 23:44:13 +0000
Subject: [PATCH 19/19] redeployed NBs and site. no errors

---
 portfolio/ntd_monthly_ridership/README.md     | 59 +++++++++++++++++--
 ...ine-county-transportation-commission.ipynb |  4 +-
 ...te-county-association-of-governments.ipynb |  4 +-
 ...ado-county-transportation-commission.ipynb |  4 +-
 ...fresno-county-council-of-governments.ipynb |  4 +-
 ...ial-county-transportation-commission.ipynb |  2 +-
 ...rt__rtpa_kern-council-of-governments.ipynb |  2 +-
 ...gs-county-association-of-governments.ipynb |  4 +-
 ...etropolitan-transportation-authority.ipynb |  4 +-
 ...ed-county-association-of-governments.ipynb |  4 +-
 ...tropolitan-transportation-commission.ipynb |  4 +-
 ...ange-county-transportation-authority.ipynb |  4 +-
 ...ounty-transportation-planning-agency.ipynb |  4 +-
 ...ide-county-transportation-commission.ipynb |  4 +-
 ...cramento-area-council-of-governments.ipynb |  4 +-
 ...dino-county-transportation-authority.ipynb |  4 +-
 ...san-diego-association-of-governments.ipynb |  4 +-
 ...a_san-joaquin-council-of-governments.ipynb |  4 +-
 ...n-luis-obispo-council-of-governments.ipynb |  2 +-
 ...ra-county-association-of-governments.ipynb |  4 +-
 ...ruz-county-transportation-commission.ipynb |  4 +-
 ...hasta-regional-transportation-agency.ipynb |  4 +-
 ...pa_stanislaus-council-of-governments.ipynb |  4 +-
 ..._rtpa_tahoe-regional-planning-agency.ipynb |  4 +-
 ...portation-agency-for-monterey-county.ipynb |  4 +-
 ...re-county-association-of-governments.ipynb |  4 +-
 ...ura-county-transportation-commission.ipynb |  4 +-
 27 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/portfolio/ntd_monthly_ridership/README.md b/portfolio/ntd_monthly_ridership/README.md
index 873d76444..cca2300da 100644
--- a/portfolio/ntd_monthly_ridership/README.md
+++ b/portfolio/ntd_monthly_ridership/README.md
@@ -1,10 +1,57 @@
-# NTD Monthly Ridership by RTPA
+# Monthly NTD Ridership by RTPA
 
-Provide CalSTA with NTD Monthly Ridership by each regional transportation planning authority (RTPA).
+Provide CalSTA with NTD Monthly Ridership by each RTPA. 
+
+Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf)
+>Caltrans will provide all RTPAs with a summary report each month that meets the requirements of this statutory provision, drawn from the data reported to the National Transit Database. The data will be drawn from the NTD at: [Complete Monthly Ridership (with adjustments and estimates) | FTA (dot.gov)](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release). RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed.
 
 This report shows general ridership trends by transit agency, mode, and type of service. Reported unlinked passenger trips are reported, as well as the change from the prior year. For example, July 2023's change would be the change in July 2023's reported values against July 2022's reported values.
 
-## Datasets
-1. NTD monthly data: https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. 
-2. [RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true)
-3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis).
\ No newline at end of file
+## Definitions
+- **FTA**: Federal Transit Admisistration.
+- **NTD**: National Transit Database. A reporting system that collects public transportation financial and operating information.
+- **RTPA**: Regional Transportation Planning Authority.
+- **UZA**: Urbanized Areas. An urbanized area is an incorporated area with a population of 50,000 or more that is designated as such by the U.S. Department of Commerce, Bureau of the Census.
+- **MODE**: A system for carrying transit passengers described by specific right-of-way (ROW), technology and operational features. Examples: Bus, Cable Car, Light Rail.
+- **TOS**: Describes how public transportation services are provided by the transit agency: directly operated (DO) or purchased transportation (PT) services.
+
+## Methodology
+Ridership data is ingested via the `FTA Complete Monthly Ridership` report, per the SB125 guidelines. Then filtered for agencies residing in California UZAs. These California Agencies are grouped by RTPAs, then aggregated by agencies, mode and TOS. The processed data for each RTPA is saved to a public respository, see datasets below.
+
+
+## Frequently Asked Questions
+**Q: Which agencies/transit operators are in this report? Why are some agencies missing from an RTPA?**
+
+Per the [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) webpage:
+>File Summary: Contains monthly-updated service information reported by urban Full Reporters.
+
+Urban full reporters, that submit monthly ridership data to NTD, are included in this report. This report tracks data from 2018 to present. If an agency is not a monthly reporter, or has not reported data since 2018, they will not appear in the report.
+
+
+**Q: Where can I download my RTPA's data?**
+
+Data from this report can be downloaded from the Cal-ITP public data repository, see `Fully Processed Data Download` below. A Google Account is required to access the repoisitory. Once logged in, navigate to `ntd_monthly_ridership`, click the year-month you want to download, then click `download`.
+
+The data is a zipped folder of all RTPA data for the year-month.
+
+
+**Q: How can my RTPA/Agency meet the requirements of the SB125 Guidelines regarding how "to make publicly available a summary of ridership data"**
+
+Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf):
+>RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed
+
+Hyperlinking this report on your RTPA's/Agency's webpage is a common method to meeting this requirement.
+
+## Datasets / Data Sources
+- [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) 
+- [California RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true)
+- [Fully Processed Data Download](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)
+
+
+
+## Who We Are
+This website was created by the [California Department of Transportation](https://dot.ca.gov/)'s Division of Data and Digital Services. We are a group of data analysts and scientists who analyze transportation data, such as General Transit Feed Specification (GTFS) data, or data from funding programs such as the Active Transportation Program. Our goal is to transform messy and indecipherable original datasets into usable, customer-friendly products to better the transportation landscape. For more of our work, visit our [portfolio](https://analysis.calitp.org/).
+
+<img src="https://raw.githubusercontent.com/cal-itp/data-analyses/main/portfolio/Calitp_logo_MAIN.png" alt="Alt text" width="200" height="100"> <img src="https://raw.githubusercontent.com/cal-itp/data-analyses/main/portfolio/CT_logo_Wht_outline.gif" alt="Alt text" width="129" height="100">
+
+<br>Caltrans®, the California Department of Transportation® and the Caltrans logo are registered service marks of the California Department of Transportation and may not be copied, distributed, displayed, reproduced or transmitted in any form without prior written permission from the California Department of Transportation.
\ No newline at end of file
diff --git a/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb
index 2b4de639b..a6f664169 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a276d75088f0b1f3a8c57564d0d8da6f1b73f5597b6bb6477bc2b6daea96d9d
-size 285768
+oid sha256:8175c7ae1a1266faa3cbb4694d1b3c3359b4b96f020b490ad20de7a4366f541d
+size 55296
diff --git a/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb
index a76c97328..fd30829e2 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c64de10d540d6424a865b05a483d3e43a664219850ac6afe5c0f86e59fc0985c
-size 242916
+oid sha256:4ef7ce7e8441cf35982421340594c5b039b7ae13185e3fb82ce6db49927de349
+size 242920
diff --git a/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb
index ce029e99b..5993b9591 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:997edb1a71b96bccf3f865ac2f530984c6859fb9e68576d6f499e8d6b6bced9a
-size 172082
+oid sha256:125c31195abd45a8bae0e898a1549036fd1dc1ac5ba15d749bade6180580e87e
+size 172080
diff --git a/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb
index 7b1a7e25d..a79e127fe 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d04a2d51ef33edd60ef8c2a171fb27fab896570169bc61094fbb1e89f36820e
-size 289290
+oid sha256:b442779c799f869997a29d96ea59746a5ed9d6fc2c109e2ffbc158b84724a390
+size 289289
diff --git a/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb
index f36043669..46a66ea4c 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c67d2641700055fd75f1e84bdf3e5e086e5f714aa35444f82105b3bddabb184a
+oid sha256:22c5c8c15f35b51bb9da84a3dfa5d14ddf1ab84b4f7166de83309f1d228b39d5
 size 243104
diff --git a/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb
index 460b889a7..3afecec53 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8469d0c4ed4738b572f638df6a8e614e9b12c9d7dd881712d3e6e217d4ba9252
+oid sha256:3d19afd5aff65868aeb07b883c2dddcbbe6fec936b56dc1484157f5eb0987634
 size 267362
diff --git a/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb
index e5f56c41b..5e7aa1817 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad663579ae80c120a055b8ceab62d7efed22e41f56e630a520edbd03a58968c6
-size 382975
+oid sha256:4ded7acfc9b0adfa8e7cea83637a38a84419a359c0df7dc72e44d123a4f999ba
+size 382972
diff --git a/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb
index d70bb1399..5cb66d9f7 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2293626268ed34b7057941856c7f40e7d5efcab44c95fd9ba1d44e2b8b26cfce
-size 1703520
+oid sha256:0ac81fb9ffd69c04e78506cdb7e5eb233e8025078e8767be88fe031cd26fcfe7
+size 1703518
diff --git a/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb
index c2d9c4ea4..d76ced5d0 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ecb49c3bb8c480cad496c9fbfe64927c555a64a04017a1dbc00a470b2ce1e20
-size 266622
+oid sha256:80ba63a49c053301fce4678db22f3fa6610215eaa3c038855315af00065ad018
+size 266623
diff --git a/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb
index bdea3fc3a..11a59df9d 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1eea1099aaddb36720b4035df4e1044bd55da4e6da0959d3d4de95d86ff4efa6
-size 1876738
+oid sha256:f08909c7433739c7b25bc68b14738bdfffacf1164ce6e46306943652f4151dbc
+size 1928922
diff --git a/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb
index 866cb6e6c..552f021c1 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15c2b5782aeb17381411bf05925e3f0d05acce5e981ef54e7e164c3e226e96fd
-size 471566
+oid sha256:4e1b96d3cdee2df61b841e4ead1e547ba304dde585331941db52c1ffd6de73e3
+size 471565
diff --git a/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb
index 68e79c1f1..b5ddfbed1 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:325117e4d6203a1aff736e37b29b8aa6d646abedce2bca61220cf461f11d5a99
-size 397570
+oid sha256:99ee7d6b604390ba6612e474fdea68a5fe5b3b02a11ba5af143a4410c40253a6
+size 397572
diff --git a/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb
index a86a2a111..0396abc73 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab177dfee04294fa76f87e2825d6395f0fe792a343ad798ca5e55aa13489c050
-size 559223
+oid sha256:94e3e2b4b1c31edb03e18c286e7398279cfa12e7e4f326594c614fbd8625b334
+size 559224
diff --git a/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb
index 653fc6522..9acbaea84 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:430fae617c35b2e10aa026984352780668cd51bfe7b3bf5953c5c176da7d6512
-size 643895
+oid sha256:f523f790b54d2f0eb95136378e5f17e3e0350cccbe41155a612ca2ebb881270d
+size 643894
diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb
index e26214f63..a5ea0bb3f 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68da547505174438aa25d03ab7709e3dcc624763246c9de49753cf480633ddac
-size 481470
+oid sha256:aa8402e33d377313339ef39123f817064f6727c5917dc917f6db6abbf3eaed29
+size 481473
diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb
index 8700f4413..5a979d937 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed1ed101ac668d27c68aa924eb2e893883ea8fbad59366dd0c78f9deb135c7a5
-size 651290
+oid sha256:37e0c8f69a928bb92f8bef5c8e7a1e31d8f71934e40866ece86845717b03ef88
+size 651287
diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb
index 98a7ecedd..880d178e6 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cf105fa7eea1644e9d0e8f7d6869eccf5d698394b333a8d8a7dc0154ff995b2
-size 547511
+oid sha256:e4d4ad2a2a36da5b7bd8919512aad52fc59568616c24b333b9005962fbcc686d
+size 547513
diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb
index bcaefe292..7dfd3bbb0 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19f18edc26b6f102f4d7a0476e9de5cfe5cef76f029c52e4abcac4c6c3ca7d49
+oid sha256:4cdbf6a4db5499ea74be91504bf7e4dce2a15d7ee56099ef75e2bb1e81acbe1a
 size 405964
diff --git a/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb
index acc9a5999..0f24151dc 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3626a477ac7ed8f97c3da99e0abc7d329c1812700b34f6cb8123d935186850b8
-size 286678
+oid sha256:0b6e033cbb7178156a8a5b8cbf853a9861cbb8f45be22cb705e1980bce6ad1b1
+size 286675
diff --git a/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb
index 953124ea9..efbdaa020 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28fac554d1b57006129e31c4ac876c0d311b29f96f553999eac9ff875c5d64ee
-size 334857
+oid sha256:ed53648322da839e84ac46a9bb126c2e38c803eb0e77248592b986aa144bb862
+size 334856
diff --git a/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb
index f7921cbff..98c08f817 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:708194f890f7db0be8f385766fce790dc1df1f69b625d5ba5f11e68eb19853be
-size 218791
+oid sha256:1938be6a9e1c55d85a6e47c402a23bc8bec3ffc492b3f6049a131ae9423d2f3d
+size 218790
diff --git a/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb
index aa2332d31..14224425c 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d416c7eee7306eb1860e79bf7b4fa57c95f4a0195818b4d13fcf05ac2e105c9
-size 386786
+oid sha256:d10c4e41d8b71b1bc83086883babaaf0edb375796decc8eb889ea1e22bbfe090
+size 386790
diff --git a/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb
index c8aecb5e3..2fe183320 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78a415dbe91e6c22723cbcbed9884667ef4bc9e9f953c800c356362f95389e54
-size 312772
+oid sha256:7d4c94166e07bd335e3cb91eded6c37b5e73689e091b2d4f6af7971c25d644b4
+size 312770
diff --git a/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb b/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb
index 5f7b51604..b82f8ff40 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2167e17f9e1ea68463222a639c1f17c1f0f919c30a65ea959deb3494343f9df
-size 369190
+oid sha256:d5a30323d87bbc043f1076395f201295364ca0b04d126c8a4b20f66a4d196488
+size 369189
diff --git a/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb
index e5f482332..208aaaf77 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:243ab8491cb44017d0168405251d2b3ab336bd7298fcc4ab47c5572f3d9bbc93
-size 330267
+oid sha256:5628c9e6a539ef109bb0a825384a2869ab15a526451ac9b57c46b4e11afe6d30
+size 330266
diff --git a/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb
index 0a2acf1f0..73f87fef9 100644
--- a/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb
+++ b/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f79e1488c4f4f1f20b59a6ee40043509d544b8b2d6509ad5bcd4c244c06c6cf
-size 384881
+oid sha256:98677c4ed823c08f9a9b195ef12043bad5d813f1ec6050951e4d4cf97bce1ab7
+size 384880