From 3d95e3604e77b18777a4900214b44e91d1c3e49f Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:26:22 +0000 Subject: [PATCH 01/19] add dummy arrival time because gtfs-segments updated --- rt_segment_speeds/scripts/cut_stop_segments.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rt_segment_speeds/scripts/cut_stop_segments.py b/rt_segment_speeds/scripts/cut_stop_segments.py index a5ef2d2f6..8a5a94475 100644 --- a/rt_segment_speeds/scripts/cut_stop_segments.py +++ b/rt_segment_speeds/scripts/cut_stop_segments.py @@ -64,6 +64,12 @@ def stop_times_with_shape( subset="geometry" ).reset_index(drop=True).set_geometry("geometry") + # Add a dummy arrival_time that is needed in gtfs_segments that is not NaT + # or else it'll throw error in gtfs_segments.create_segments. Use zero instead. + df = df.assign( + arrival_time = 0 + ) + return df From a8796d38b0179316648b13e5adb41a01806101bf Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 23:49:45 +0000 Subject: [PATCH 02/19] (refactor): nearest 5 vp and filter to 2 --- .../scripts/new_nearest_vp_and_filter.py | 133 +++++++++++++ .../segment_speed_utils/neighbor.py | 174 +++++++++++++++++- 2 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py new file mode 100644 index 000000000..004f7e29d --- /dev/null +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -0,0 +1,133 @@ +import dask_geopandas as dg +import dask.dataframe as dd +import datetime +import geopandas as gpd +import numpy as np +import pandas as pd +import shapely +import sys + +from loguru import logger + +from shared_utils import rt_dates +from segment_speed_utils import helpers, neighbor +from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT +from segment_speed_utils.project_vars import PROJECT_CRS + + +def stop_times_for_shape_segments( + analysis_date: str, + dict_inputs: dict +) -> gpd.GeoDataFrame: + """ + This is the stop times table using only 1 shape for each + route-direction. Every trip belong to that shape + will be cut along the same stops. + This allows us to aggregate segments across trips because each + segment has the same stop_id1 and stop_id2. + """ + SEGMENT_FILE = dict_inputs["segments_file"] + + rt_trips = helpers.import_unique_vp_trips(analysis_date) + + shape_stop_combinations = pd.read_parquet( + f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet", + columns = ["trip_instance_key", + "stop_id1", "stop_pair", + "st_trip_instance_key"], + filters = [[ + #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"), + ("trip_instance_key", "in", rt_trips) + ]] + ).rename(columns = {"stop_id1": "stop_id"}) + + subset_trips = shape_stop_combinations.st_trip_instance_key.unique() + + stops_to_use = helpers.import_scheduled_stop_times( + analysis_date, + columns = ["trip_instance_key", "shape_array_key", + "stop_sequence", "stop_id", "stop_pair", + "stop_primary_direction", "geometry"], + filters = [[("trip_instance_key", "in", subset_trips)]], + get_pandas = True, + with_direction = True + ).rename(columns = {"trip_instance_key": "st_trip_instance_key"}) + + stop_times = pd.merge( + stops_to_use, + shape_stop_combinations, + on = ["st_trip_instance_key", "stop_id", "stop_pair"], + how = "inner" + ).drop( + columns = "st_trip_instance_key" + ).drop_duplicates().reset_index(drop=True) + + return stop_times + + +def new_nearest_neighbor_for_stop( + analysis_date: str, + segment_type = segment_type, + config_path = GTFS_DATA_DICT +): + """ + """ + start = datetime.datetime.now() + + dict_inputs = config_path[segment_type] + trip_stop_cols = [*dict_inputs["trip_stop_cols"]] + EXPORT_FILE = dict_inputs["stage2c"] + + stop_times = stop_times_for_shape_segments( + analysis_date, + dict_inputs + ) + + gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) + + vp_before, vp_after = np.vectorize( + neighbor.new_subset_arrays_to_valid_directions + )( + gdf.vp_primary_direction, + gdf.vp_geometry, + gdf.vp_idx, + gdf.stop_geometry, + gdf.stop_primary_direction, + gdf.shape_geometry, + gdf.stop_meters + ) + + gdf2 = gdf.assign( + before_vp_idx = vp_before, + after_vp_idx = vp_after, + )[trip_stop_cols + [ + "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"] + ] + + del gdf, stop_times + + gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet") + + end = datetime.datetime.now() + logger.info(f"nearest neighbor for {segment_type} " + f"{analysis_date}: {end - start}") + + return + + +''' +if __name__ == "__main__": + + from segment_speed_utils.project_vars import analysis_date_list + from dask import delayed, compute + + delayed_dfs = [ + delayed(new_nearest_neighbor_for_stop)( + analysis_date = analysis_date, + segment_type = segment_type, + config_path = GTFS_DATA_DICT + ) for analysis_date in analysis_date_list + ] + + [compute(i)[0] for i in delayed_dfs] +''' \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index ffa197f93..065da1330 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -8,7 +8,7 @@ from calitp_data_analysis.geography_utils import WGS84 from segment_speed_utils import gtfs_schedule_wrangling, vp_transform -from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT +from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils @@ -56,6 +56,50 @@ def merge_stop_vp_for_nearest_neighbor( return gdf +def new_merge_stop_vp_for_nearest_neighbor( + stop_times: gpd.GeoDataFrame, + analysis_date: str, + **kwargs +): + VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line + + vp_condensed = gpd.read_parquet( + f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet", + columns = ["trip_instance_key", + "vp_idx", "vp_primary_direction", + "geometry"], + **kwargs + ).to_crs(WGS84) + + shapes = helpers.import_scheduled_shapes( + analysis_date, + columns = ["shape_array_key", "geometry"], + crs = PROJECT_CRS, + get_pandas = True, + filters = [[("shape_array_key", "in", stop_times.shape_array_key.tolist())]] + ).rename(columns = {"geometry": "shape_geometry"}) + + gdf = pd.merge( + stop_times.rename( + columns = {"geometry": "stop_geometry"} + ).set_geometry("stop_geometry").to_crs(PROJECT_CRS), + vp_condensed.to_crs(PROJECT_CRS).rename( + columns = {"geometry": "vp_geometry"}), + on = "trip_instance_key", + how = "inner" + ).merge( + shapes, + on = "shape_array_key", + how = "inner" + ) + + gdf = gdf.assign( + stop_meters = gdf.shape_geometry.project(gdf.stop_geometry) + ) + + return gdf + + def subset_arrays_to_valid_directions( vp_direction_array: np.ndarray, vp_geometry: shapely.LineString, @@ -126,4 +170,130 @@ def add_nearest_neighbor_result_array( nearest_vp_arr = nearest_vp_arr_series ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"]) - return gdf2 \ No newline at end of file + return gdf2 + + + +def find_nearest_points( + vp_coords_line: np.ndarray, + target_stop: shapely.Point, + vp_idx_array: np.ndarray, +) -> np.ndarray: + """ + vp_coords_line is all the vehicle positions strung together as + coordinates in a linestring. + The target point is a stop. + + We want to find the k nearest points before/after a stop. + Start with k=5. + Returns an array that gives the indices that are the nearest k points + (ex: nearest 5 vp to each stop). + """ + indices = geo_utils.nearest_snap( + vp_coords_line, + target_stop, + k_neighbors = 5 + ) + + # nearest neighbor returns self.N + # if there are no nearest neighbor results found + # if we want 10 nearest neighbors and 8th, 9th, 10th are all + # the same result, the 8th will have a result, then 9th and 10th will + # return the length of the array (which is out-of-bounds) + indices2 = indices[indices < vp_idx_array.size] + + return indices2 + + +def filter_to_nearest2_vp( + vp_coords_line: np.ndarray, + shape_geometry: shapely.LineString, + vp_idx_array: np.ndarray, + stop_meters: float, + indices_of_nearest: np.ndarray, +) -> tuple[np.ndarray]: + """ + Take the indices that are the nearest. + Filter the vp coords down and project those against the shape_geometry. + Calculate how close those nearest k vp are to a stop (as they travel along a shape). + + Filter down to the nearest 2 vp before and after a stop. + If there isn't one before or after, a value of -1 is returned. + """ + # Subset the array of vp coords and vp_idx_array with + # the indices that show the nearest k neighbors. + nearest_vp = vp_coords_line[indices_of_nearest] + nearest_vp_idx = vp_idx_array[indices_of_nearest] + + # Project these vp coords to shape geometry and see how far it is + # from the stop's position on the shape + nearest_vp_projected = np.asarray( + [shape_geometry.project(shapely.Point(i)) - stop_meters + for i in nearest_vp] + ) + + # Negative values are before the stop + # Positive values are vp after the stop + before_indices = np.where(nearest_vp_projected < 0)[0] + after_indices = np.where(nearest_vp_projected > 0)[0] + + # Grab the closest vp before a stop (-1 means array was empty) + if before_indices.size > 0: + before = nearest_vp_idx[before_indices][-1] + else: + before = -1 + + # Grab the closest vp after a stop (-1 means array was empty) + if after_indices.size > 0: + after = nearest_vp_idx[after_indices][0] + else: + after = -1 + + return before, after + + +def new_subset_arrays_to_valid_directions( + vp_direction_array: np.ndarray, + vp_geometry: shapely.LineString, + vp_idx_array: np.ndarray, + stop_geometry: shapely.Point, + stop_direction: str, + shape_geometry: shapely.LineString, + stop_meters: float +) -> np.ndarray: + """ + Each row stores several arrays related to vp. + vp_direction is an array, vp_idx is an array, + and the linestring of vp coords can be coerced into an array. + + When we're doing nearest neighbor search, we want to + first filter the full array down to valid vp + before snapping it. + """ + opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] + + # These are the valid index values where opposite direction + # is excluded + valid_indices = (vp_direction_array != opposite_direction).nonzero() + + # These are vp coords where index values of opposite direction is excluded + valid_vp_coords_line = np.array(vp_geometry.coords)[valid_indices] + + # These are the subset of vp_idx values where opposite direction is excluded + valid_vp_idx_arr = np.asarray(vp_idx_array)[valid_indices] + + nearest_indices = find_nearest_points( + valid_vp_coords_line, + stop_geometry, + valid_vp_idx_arr, + ) + + before_vp, after_vp = filter_to_nearest2_vp( + valid_vp_coords_line, + shape_geometry, + valid_vp_idx_arr, + stop_meters, + nearest_indices, + ) + + return before_vp, after_vp \ No newline at end of file From 533c7392017d5ce061f5ab4407d7d11165633bb6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 23:50:28 +0000 Subject: [PATCH 03/19] could stage2c replace stage2/stage2b outputs? --- _shared_utils/shared_utils/gtfs_analytics_data.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index 5391c75b8..96ffcb1b7 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -74,6 +74,7 @@ stop_segments: stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_shape_segments" stage2b: "nearest/nearest2_vp_shape_segments" + stage2c: "nearest/wide_nearest2_vp_shape_segments" stage3: "stop_arrivals" stage4: "speeds_stop_segments" trip_stop_cols: ["trip_instance_key", "stop_sequence"] From bc84b9ff8a5142c51235085186c5fb33ec3bca5c Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Sat, 14 Dec 2024 01:05:13 +0000 Subject: [PATCH 04/19] (refactor): add vp shape meters for nearest neighbor results --- .../scripts/new_nearest_vp_and_filter.py | 27 +++++++++++------ .../segment_speed_utils/neighbor.py | 30 +++++++++++-------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py index 004f7e29d..63c299718 100644 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -1,5 +1,3 @@ -import dask_geopandas as dg -import dask.dataframe as dd import datetime import geopandas as gpd import numpy as np @@ -36,7 +34,6 @@ def stop_times_for_shape_segments( "stop_id1", "stop_pair", "st_trip_instance_key"], filters = [[ - #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"), ("trip_instance_key", "in", rt_trips) ]] ).rename(columns = {"stop_id1": "stop_id"}) @@ -67,7 +64,7 @@ def stop_times_for_shape_segments( def new_nearest_neighbor_for_stop( analysis_date: str, - segment_type = segment_type, + segment_type: str, config_path = GTFS_DATA_DICT ): """ @@ -85,7 +82,7 @@ def new_nearest_neighbor_for_stop( gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - vp_before, vp_after = np.vectorize( + vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize( neighbor.new_subset_arrays_to_valid_directions )( gdf.vp_primary_direction, @@ -100,8 +97,12 @@ def new_nearest_neighbor_for_stop( gdf2 = gdf.assign( before_vp_idx = vp_before, after_vp_idx = vp_after, + before_vp_meters = vp_before_m, + after_vp_meters = vp_after_m )[trip_stop_cols + [ - "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"] + "shape_array_key", "stop_meters", + "before_vp_idx", "after_vp_idx", + "before_vp_meters", "after_vp_meters"] ] del gdf, stop_times @@ -115,11 +116,20 @@ def new_nearest_neighbor_for_stop( return -''' + if __name__ == "__main__": - from segment_speed_utils.project_vars import analysis_date_list + #from segment_speed_utils.project_vars import analysis_date_list + from dask import delayed, compute + LOG_FILE = "../logs/test.log" + logger.add(LOG_FILE, retention="3 months") + logger.add(sys.stderr, + format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", + level="INFO") + + analysis_date_list = [rt_dates.DATES["sep2024"]] + segment_type = "stop_segments" delayed_dfs = [ delayed(new_nearest_neighbor_for_stop)( @@ -130,4 +140,3 @@ def new_nearest_neighbor_for_stop( ] [compute(i)[0] for i in delayed_dfs] -''' \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index 065da1330..f9c19ea2c 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -7,7 +7,7 @@ import shapely from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling, vp_transform +from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils @@ -69,7 +69,7 @@ def new_merge_stop_vp_for_nearest_neighbor( "vp_idx", "vp_primary_direction", "geometry"], **kwargs - ).to_crs(WGS84) + ) shapes = helpers.import_scheduled_shapes( analysis_date, @@ -228,28 +228,32 @@ def filter_to_nearest2_vp( # Project these vp coords to shape geometry and see how far it is # from the stop's position on the shape nearest_vp_projected = np.asarray( - [shape_geometry.project(shapely.Point(i)) - stop_meters + [shape_geometry.project(shapely.Point(i)) for i in nearest_vp] ) # Negative values are before the stop # Positive values are vp after the stop - before_indices = np.where(nearest_vp_projected < 0)[0] - after_indices = np.where(nearest_vp_projected > 0)[0] + before_indices = np.where(nearest_vp_projected - stop_meters < 0)[0] + after_indices = np.where(nearest_vp_projected - stop_meters > 0)[0] # Grab the closest vp before a stop (-1 means array was empty) if before_indices.size > 0: - before = nearest_vp_idx[before_indices][-1] + before_idx = nearest_vp_idx[before_indices][-1] + before_vp_meters = nearest_vp_projected[before_indices][-1] else: - before = -1 + before_idx = -1 + before_vp_meters = 0 # Grab the closest vp after a stop (-1 means array was empty) if after_indices.size > 0: - after = nearest_vp_idx[after_indices][0] + after_idx = nearest_vp_idx[after_indices][0] + after_vp_meters = nearest_vp_projected[after_indices][0] else: - after = -1 + after_idx = -1 + after_vp_meters = 0 - return before, after + return before_idx, after_idx, before_vp_meters, after_vp_meters def new_subset_arrays_to_valid_directions( @@ -287,8 +291,8 @@ def new_subset_arrays_to_valid_directions( stop_geometry, valid_vp_idx_arr, ) - - before_vp, after_vp = filter_to_nearest2_vp( + + before_vp, after_vp, before_meters, after_meters = filter_to_nearest2_vp( valid_vp_coords_line, shape_geometry, valid_vp_idx_arr, @@ -296,4 +300,4 @@ def new_subset_arrays_to_valid_directions( nearest_indices, ) - return before_vp, after_vp \ No newline at end of file + return before_vp, after_vp, before_meters, after_meters \ No newline at end of file From 4a26425adffacd05f53dfc1af07b222eee139b67 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 17 Dec 2024 18:53:44 +0000 Subject: [PATCH 05/19] rename columns in nearest vp output --- .../scripts/new_nearest_vp_and_filter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py index 63c299718..9a9b21fff 100644 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -82,7 +82,7 @@ def new_nearest_neighbor_for_stop( gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize( + vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( neighbor.new_subset_arrays_to_valid_directions )( gdf.vp_primary_direction, @@ -95,14 +95,14 @@ def new_nearest_neighbor_for_stop( ) gdf2 = gdf.assign( - before_vp_idx = vp_before, - after_vp_idx = vp_after, - before_vp_meters = vp_before_m, - after_vp_meters = vp_after_m + prior_vp_idx = vp_before, + subseq_vp_idx = vp_after, + prior_vp_meters = vp_before_meters, + subseq_vp_meters = vp_after_meters )[trip_stop_cols + [ "shape_array_key", "stop_meters", - "before_vp_idx", "after_vp_idx", - "before_vp_meters", "after_vp_meters"] + "prior_vp_idx", "subseq_vp_idx", + "prior_vp_meters", "subseq_vp_meters"] ] del gdf, stop_times From 94496c2296f61a7f33ecb0ee88ce4fd182376f34 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 17 Dec 2024 18:54:24 +0000 Subject: [PATCH 06/19] (refactor): stop arrival interpolation is better set up now, remove intermediate steps --- .../scripts/interpolate_stop_arrival.py | 101 ++++++------------ 1 file changed, 30 insertions(+), 71 deletions(-) diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index a155b74c1..c8b5a50c5 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -1,4 +1,10 @@ """ +Interpolate stop arrival +based on where the nearest 2 vp are +when stop position is between the 2 vp. + +Stop and vp geometries should be projected along the shape geometry, +Use `stop_meters`, `prior_vp_meters`, `subseq_vp_meters`. """ import datetime import geopandas as gpd @@ -33,62 +39,6 @@ def get_vp_timestamps( return vp -def consolidate_surrounding_vp( - df: pd.DataFrame, - group_cols: list, -) -> pd.DataFrame: - """ - This reshapes the df to wide so that each stop position has - a prior and subseq timestamp (now called vp_timestamp_local). - """ - df = df.assign( - obs = (df.sort_values(group_cols + ["vp_idx"]) - .groupby(group_cols, - observed=True, group_keys=False, dropna=False) - .cumcount() - ) - ) - - group_cols2 = group_cols + ["stop_meters"] - prefix_cols = ["vp_idx", "shape_meters"] - timestamp_cols = ["location_timestamp_local", "moving_timestamp_local"] - # since shape_meters actually might be decreasing as time progresses, - # (bus moving back towards origin of shape) - # we don't actually know that the smaller shape_meters is the first timestamp - # nor the larger shape_meters is the second timestamp. - # all we know is that stop_meters (stop) falls between these 2 shape_meters. - # sort by timestamp, and set the order to be 0, 1 - vp_before_stop = df.loc[df.obs==0][group_cols2 + prefix_cols + timestamp_cols] - vp_after_stop = df.loc[df.obs==1][group_cols2 + prefix_cols + timestamp_cols] - - # For the vp before the stop occurs, we want the maximum timestamp - # of the last position - # We want to keep the moving_timestamp (which is after it's dwelled) - vp_before_stop = vp_before_stop.assign( - prior_vp_timestamp_local = vp_before_stop.moving_timestamp_local, - ).rename( - columns = {**{i: f"prior_{i}" for i in prefix_cols}} - ).drop(columns = timestamp_cols) - - # For the vp after the stop occurs, we want the minimum timestamp - # of that next position - # Keep location_timetamp (before it dwells) - vp_after_stop = vp_after_stop.assign( - subseq_vp_timestamp_local = vp_after_stop.location_timestamp_local, - ).rename( - columns = {**{i: f"subseq_{i}" for i in prefix_cols}} - ).drop(columns = timestamp_cols) - - df_wide = pd.merge( - vp_before_stop, - vp_after_stop, - on = group_cols2, - how = "inner" - ) - - return df_wide - - def add_arrival_time( nearest_vp_input_file: str, vp_timestamp_file: str, @@ -106,7 +56,11 @@ def add_arrival_time( f"{SEGMENT_GCS}{nearest_vp_input_file}_{analysis_date}.parquet" ) - subset_vp = vp_filtered.vp_idx.unique() + subset_vp = np.unique( + np.concatenate( + (vp_filtered.prior_vp_idx.unique(), + vp_filtered.subseq_vp_idx.unique()) + )).tolist() vp_timestamps = get_vp_timestamps( vp_timestamp_file, @@ -116,10 +70,14 @@ def add_arrival_time( df = pd.merge( vp_filtered, - vp_timestamps, - on = "vp_idx", + vp_timestamps.add_prefix("prior_"), + on = "prior_vp_idx", + how = "inner" + ).merge( + vp_timestamps.add_prefix("subseq_"), + on = "subseq_vp_idx", how = "inner" - ).pipe(consolidate_surrounding_vp, group_cols) + ) arrival_time_series = [] @@ -128,13 +86,13 @@ def add_arrival_time( stop_position = getattr(row, "stop_meters") projected_points = np.asarray([ - getattr(row, "prior_shape_meters"), - getattr(row, "subseq_shape_meters") + getattr(row, "prior_vp_meters"), + getattr(row, "subseq_vp_meters") ]) timestamp_arr = np.asarray([ - getattr(row, "prior_vp_timestamp_local"), - getattr(row, "subseq_vp_timestamp_local"), + getattr(row, "prior_moving_timestamp_local"), + getattr(row, "subseq_location_timestamp_local"), ]) @@ -215,12 +173,13 @@ def enforce_monotonicity_and_interpolate_across_stops( ) # Subset to trips that have at least 1 obs that violates monotonicity - trips_with_one_false = (df.groupby("trip_instance_key") - .agg({"arrival_time_sec_monotonic": "min"}) - .reset_index() - .query('arrival_time_sec_monotonic==0') - .trip_instance_key - ) + trips_with_one_false = ( + df.groupby("trip_instance_key") + .agg({"arrival_time_sec_monotonic": "min"}) + .reset_index() + .query('arrival_time_sec_monotonic==0') + .trip_instance_key + ) # Set arrival times to NaT if it's not monotonically increasing mask = df.arrival_time_sec_monotonic == False @@ -254,7 +213,7 @@ def interpolate_stop_arrivals( dict_inputs = config_path[segment_type] trip_stop_cols = [*dict_inputs["trip_stop_cols"]] USABLE_VP_FILE = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage2b"] + INPUT_FILE = dict_inputs["stage2c"] STOP_ARRIVALS_FILE = dict_inputs["stage3"] start = datetime.datetime.now() From 06770ca6f3781ab669e0a7c2d9a9778ae28f6d2d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:09:11 +0000 Subject: [PATCH 07/19] (refactor): refactored changes put into scripts --- .../scripts/interpolate_stop_arrival.py | 2 +- .../scripts/nearest_vp_to_stop.py | 50 ++++--- .../scripts/pipeline_rt_stop_times.py | 17 +-- .../scripts/pipeline_segment_speeds.py | 14 -- .../scripts/pipeline_speedmap.py | 12 -- .../segment_speed_utils/neighbor.py | 131 ++---------------- rt_segment_speeds/setup.py | 2 +- 7 files changed, 42 insertions(+), 186 deletions(-) diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index c8b5a50c5..4031a0c87 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -213,7 +213,7 @@ def interpolate_stop_arrivals( dict_inputs = config_path[segment_type] trip_stop_cols = [*dict_inputs["trip_stop_cols"]] USABLE_VP_FILE = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage2c"] + INPUT_FILE = dict_inputs["stage2"] STOP_ARRIVALS_FILE = dict_inputs["stage3"] start = datetime.datetime.now() diff --git a/rt_segment_speeds/scripts/nearest_vp_to_stop.py b/rt_segment_speeds/scripts/nearest_vp_to_stop.py index 25e59f742..2ea13aae0 100644 --- a/rt_segment_speeds/scripts/nearest_vp_to_stop.py +++ b/rt_segment_speeds/scripts/nearest_vp_to_stop.py @@ -12,7 +12,6 @@ from typing import Literal, Optional from calitp_data_analysis.geography_utils import WGS84 -from calitp_data_analysis import utils from segment_speed_utils import helpers, neighbor from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import SEGMENT_TYPES @@ -155,31 +154,40 @@ def nearest_neighbor_for_stop( else: print(f"{segment_type} is not valid") - gdf = neighbor.merge_stop_vp_for_nearest_neighbor( - stop_times, analysis_date) - - results = neighbor.add_nearest_neighbor_result_array(gdf, analysis_date) - - # Keep columns from results that are consistent across segment types - # use trip_stop_cols as a way to uniquely key into a row - keep_cols = trip_stop_cols + [ - "shape_array_key", - "stop_geometry", - "nearest_vp_arr" - ] - utils.geoparquet_gcs_export( - results[keep_cols], - SEGMENT_GCS, - EXPORT_FILE, + gdf = neighbor.merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) + + vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( + neighbor.subset_arrays_to_valid_directions + )( + gdf.vp_primary_direction, + gdf.vp_geometry, + gdf.vp_idx, + gdf.stop_geometry, + gdf.stop_primary_direction, + gdf.shape_geometry, + gdf.stop_meters ) + + gdf2 = gdf.assign( + prior_vp_idx = vp_before, + subseq_vp_idx = vp_after, + prior_vp_meters = vp_before_meters, + subseq_vp_meters = vp_after_meters + )[trip_stop_cols + [ + "shape_array_key", "stop_meters", + "prior_vp_idx", "subseq_vp_idx", + "prior_vp_meters", "subseq_vp_meters"] + ] + + del gdf, stop_times + + gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}.parquet") end = datetime.datetime.now() logger.info(f"nearest neighbor for {segment_type} " - f"{analysis_date}: {end - start}") - - del gdf, stop_times, results - + f"{analysis_date}: {end - start}") + return ''' diff --git a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py index d428933a3..a37199c96 100644 --- a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py +++ b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py @@ -9,7 +9,6 @@ from loguru import logger from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals from update_vars import GTFS_DATA_DICT @@ -36,21 +35,7 @@ ] [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - + logger.remove() diff --git a/rt_segment_speeds/scripts/pipeline_segment_speeds.py b/rt_segment_speeds/scripts/pipeline_segment_speeds.py index df3862fcd..fe8084eba 100644 --- a/rt_segment_speeds/scripts/pipeline_segment_speeds.py +++ b/rt_segment_speeds/scripts/pipeline_segment_speeds.py @@ -9,7 +9,6 @@ from loguru import logger from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals from update_vars import GTFS_DATA_DICT @@ -40,19 +39,6 @@ del delayed_dfs - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - logger.remove() LOG_FILE = "../logs/interpolate_stop_arrival.log" diff --git a/rt_segment_speeds/scripts/pipeline_speedmap.py b/rt_segment_speeds/scripts/pipeline_speedmap.py index 545047217..293db2e44 100644 --- a/rt_segment_speeds/scripts/pipeline_speedmap.py +++ b/rt_segment_speeds/scripts/pipeline_speedmap.py @@ -14,7 +14,6 @@ from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals @@ -88,17 +87,6 @@ def concatenate_speedmap_proxy_arrivals_with_remaining( ] [compute(i)[0] for i in delayed_dfs] - - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] logger.remove() diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index f9c19ea2c..f5d21ce84 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -6,61 +6,21 @@ import pandas as pd import shapely -from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform +from segment_speed_utils import helpers, vp_transform from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils -def add_nearest_vp_idx( - vp_linestring: shapely.LineString, - stop: shapely.Point, - vp_idx_arr: np.ndarray -) -> int: - """ - Index into where the nearest vp is to the stop, - and return that vp_idx value from the vp_idx array. - """ - idx = geo_utils.nearest_snap(vp_linestring, stop, k_neighbors=1) - - return vp_idx_arr[idx] - - def merge_stop_vp_for_nearest_neighbor( stop_times: gpd.GeoDataFrame, analysis_date: str, **kwargs -) -> gpd.GeoDataFrame: - VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line - - vp_condensed = gpd.read_parquet( - f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet", - columns = ["trip_instance_key", - "vp_idx", "vp_primary_direction", - "geometry"], - **kwargs - ).to_crs(WGS84) - - gdf = pd.merge( - stop_times.rename( - columns = {"geometry": "stop_geometry"} - ).set_geometry("stop_geometry").to_crs(WGS84), - vp_condensed.rename( - columns = { - "geometry": "vp_geometry" - }), - on = "trip_instance_key", - how = "inner" - ) - - return gdf - - -def new_merge_stop_vp_for_nearest_neighbor( - stop_times: gpd.GeoDataFrame, - analysis_date: str, - **kwargs ): + """ + Merge stop times file with vp. + vp gdf has been condensed so that all the vp coords + make up coordinates of a linestring. + """ VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line vp_condensed = gpd.read_parquet( @@ -93,6 +53,9 @@ def new_merge_stop_vp_for_nearest_neighbor( how = "inner" ) + # Calculate stop_meters, which is the stop geometry + # projected onto shape_geometry and is interpreted as + # stop X is Y meters along shape gdf = gdf.assign( stop_meters = gdf.shape_geometry.project(gdf.stop_geometry) ) @@ -100,80 +63,6 @@ def new_merge_stop_vp_for_nearest_neighbor( return gdf -def subset_arrays_to_valid_directions( - vp_direction_array: np.ndarray, - vp_geometry: shapely.LineString, - vp_idx_array: np.ndarray, - stop_geometry: shapely.Point, - stop_direction: str, -) -> np.ndarray: - """ - Each row stores several arrays related to vp. - vp_direction is an array, vp_idx is an array, - and the linestring of vp coords can be coerced into an array. - - When we're doing nearest neighbor search, we want to - first filter the full array down to valid vp - before snapping it. - """ - N_NEAREST_POINTS = 10 - - opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] - - # These are the valid index values where opposite direction - # is excluded - valid_indices = (vp_direction_array != opposite_direction).nonzero() - - vp_coords_line = np.array(vp_geometry.coords)[valid_indices] - - vp_idx_arr = np.asarray(vp_idx_array)[valid_indices] - - np_inds = geo_utils.nearest_snap( - vp_coords_line, stop_geometry, N_NEAREST_POINTS - ) - - # nearest neighbor returns self.N - # if there are no nearest neighbor results found - # if we want 10 nearest neighbors and 8th, 9th, 10th are all - # the same result, the 8th will have a result, then 9th and 10th will - # return the length of the array (which is out-of-bounds) - np_inds2 = np_inds[np_inds < vp_idx_arr.size] - - nearest_vp_arr = vp_idx_arr[np_inds2] - - return nearest_vp_arr - - -def add_nearest_neighbor_result_array( - gdf: gpd.GeoDataFrame, - analysis_date: str, - **kwargs -) -> pd.DataFrame: - """ - Add the nearest k_neighbors result. - """ - nearest_vp_arr_series = [] - - for row in gdf.itertuples(): - - nearest_vp_arr = subset_arrays_to_valid_directions( - getattr(row, "vp_primary_direction"), - getattr(row, "vp_geometry"), - getattr(row, "vp_idx"), - getattr(row, "stop_geometry"), - getattr(row, "stop_primary_direction"), - ) - - nearest_vp_arr_series.append(nearest_vp_arr) - - gdf2 = gdf.assign( - nearest_vp_arr = nearest_vp_arr_series - ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"]) - - return gdf2 - - - def find_nearest_points( vp_coords_line: np.ndarray, target_stop: shapely.Point, @@ -256,7 +145,7 @@ def filter_to_nearest2_vp( return before_idx, after_idx, before_vp_meters, after_vp_meters -def new_subset_arrays_to_valid_directions( +def subset_arrays_to_valid_directions( vp_direction_array: np.ndarray, vp_geometry: shapely.LineString, vp_idx_array: np.ndarray, diff --git a/rt_segment_speeds/setup.py b/rt_segment_speeds/setup.py index be6555fa4..ee07f9071 100644 --- a/rt_segment_speeds/setup.py +++ b/rt_segment_speeds/setup.py @@ -3,7 +3,7 @@ setup( name="segment_speed_utils", packages=find_packages(), - version="1.6", + version="1.7", description="Utility functions for GTFS RT segment speeds", author="Cal-ITP", license="Apache", From 493712eaec2c461ff508cd86ef93d05f03055cc7 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:09:56 +0000 Subject: [PATCH 08/19] (refactor): stage2, 2b now consolidated as stage2 --- _shared_utils/shared_utils/gtfs_analytics_data.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index 96ffcb1b7..712d31dc1 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -73,8 +73,6 @@ stop_segments: dir: ${gcs_paths.SEGMENT_GCS} stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_shape_segments" - stage2b: "nearest/nearest2_vp_shape_segments" - stage2c: "nearest/wide_nearest2_vp_shape_segments" stage3: "stop_arrivals" stage4: "speeds_stop_segments" trip_stop_cols: ["trip_instance_key", "stop_sequence"] @@ -82,7 +80,7 @@ stop_segments: stop_pair_cols: ["stop_pair", "stop_pair_name"] route_dir_cols: ["route_id", "direction_id"] segment_cols: ["route_id", "direction_id", "stop_pair", "geometry"] - shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024 + #shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024 route_dir_single_segment: "rollup_singleday/speeds_route_dir_segments" route_dir_single_segment_detail: "rollup_singleday/speeds_route_dir_segments_detail" # interim for speedmaps route_dir_multi_segment: "rollup_multiday/speeds_route_dir_segments" @@ -95,7 +93,6 @@ rt_stop_times: dir: ${gcs_paths.SEGMENT_GCS} stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_rt_stop_times" - stage2b: "nearest/nearest2_vp_rt_stop_times" stage3: "rt_stop_times/stop_arrivals" stage4: "rt_stop_times/speeds" trip_stop_cols: ["trip_instance_key", "stop_sequence"] @@ -116,7 +113,6 @@ speedmap_segments: stage1: ${speeds_tables.vp_dwell} proxy_stop_times: "stop_time_expansion/speedmap_stop_times" stage2: "nearest/nearest_vp_speedmap_proxy" - stage2b: "nearest/nearest2_vp_speedmap_proxy" stage3: "speedmap/stop_arrivals_proxy" stage3b: "speedmap/stop_arrivals" stage4: "speedmap/speeds" From 3f04352347083b69de86321d20988add36679507 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:11:35 +0000 Subject: [PATCH 09/19] (remove): testing script consolidating nearest neighbor intermediate steps --- .../scripts/interpolate_stop_arrival.py | 1 - .../scripts/new_nearest_vp_and_filter.py | 142 ------------------ 2 files changed, 143 deletions(-) delete mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index 4031a0c87..fafb920b8 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -95,7 +95,6 @@ def add_arrival_time( getattr(row, "subseq_location_timestamp_local"), ]) - interpolated_arrival = segment_calcs.interpolate_stop_arrival_time( stop_position, projected_points, timestamp_arr) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py deleted file mode 100644 index 9a9b21fff..000000000 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ /dev/null @@ -1,142 +0,0 @@ -import datetime -import geopandas as gpd -import numpy as np -import pandas as pd -import shapely -import sys - -from loguru import logger - -from shared_utils import rt_dates -from segment_speed_utils import helpers, neighbor -from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT -from segment_speed_utils.project_vars import PROJECT_CRS - - -def stop_times_for_shape_segments( - analysis_date: str, - dict_inputs: dict -) -> gpd.GeoDataFrame: - """ - This is the stop times table using only 1 shape for each - route-direction. Every trip belong to that shape - will be cut along the same stops. - This allows us to aggregate segments across trips because each - segment has the same stop_id1 and stop_id2. - """ - SEGMENT_FILE = dict_inputs["segments_file"] - - rt_trips = helpers.import_unique_vp_trips(analysis_date) - - shape_stop_combinations = pd.read_parquet( - f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet", - columns = ["trip_instance_key", - "stop_id1", "stop_pair", - "st_trip_instance_key"], - filters = [[ - ("trip_instance_key", "in", rt_trips) - ]] - ).rename(columns = {"stop_id1": "stop_id"}) - - subset_trips = shape_stop_combinations.st_trip_instance_key.unique() - - stops_to_use = helpers.import_scheduled_stop_times( - analysis_date, - columns = ["trip_instance_key", "shape_array_key", - "stop_sequence", "stop_id", "stop_pair", - "stop_primary_direction", "geometry"], - filters = [[("trip_instance_key", "in", subset_trips)]], - get_pandas = True, - with_direction = True - ).rename(columns = {"trip_instance_key": "st_trip_instance_key"}) - - stop_times = pd.merge( - stops_to_use, - shape_stop_combinations, - on = ["st_trip_instance_key", "stop_id", "stop_pair"], - how = "inner" - ).drop( - columns = "st_trip_instance_key" - ).drop_duplicates().reset_index(drop=True) - - return stop_times - - -def new_nearest_neighbor_for_stop( - analysis_date: str, - segment_type: str, - config_path = GTFS_DATA_DICT -): - """ - """ - start = datetime.datetime.now() - - dict_inputs = config_path[segment_type] - trip_stop_cols = [*dict_inputs["trip_stop_cols"]] - EXPORT_FILE = dict_inputs["stage2c"] - - stop_times = stop_times_for_shape_segments( - analysis_date, - dict_inputs - ) - - gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - - vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( - neighbor.new_subset_arrays_to_valid_directions - )( - gdf.vp_primary_direction, - gdf.vp_geometry, - gdf.vp_idx, - gdf.stop_geometry, - gdf.stop_primary_direction, - gdf.shape_geometry, - gdf.stop_meters - ) - - gdf2 = gdf.assign( - prior_vp_idx = vp_before, - subseq_vp_idx = vp_after, - prior_vp_meters = vp_before_meters, - subseq_vp_meters = vp_after_meters - )[trip_stop_cols + [ - "shape_array_key", "stop_meters", - "prior_vp_idx", "subseq_vp_idx", - "prior_vp_meters", "subseq_vp_meters"] - ] - - del gdf, stop_times - - gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet") - - end = datetime.datetime.now() - logger.info(f"nearest neighbor for {segment_type} " - f"{analysis_date}: {end - start}") - - return - - - -if __name__ == "__main__": - - #from segment_speed_utils.project_vars import analysis_date_list - - from dask import delayed, compute - LOG_FILE = "../logs/test.log" - logger.add(LOG_FILE, retention="3 months") - logger.add(sys.stderr, - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - level="INFO") - - analysis_date_list = [rt_dates.DATES["sep2024"]] - segment_type = "stop_segments" - - delayed_dfs = [ - delayed(new_nearest_neighbor_for_stop)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] From 406035a47dffa825c559620bb6cba43569d4d43c Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 18:51:00 +0000 Subject: [PATCH 10/19] (open_data): update readme to include 2 speed layers --- open_data/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/open_data/README.md b/open_data/README.md index ecb672b5d..48e73d5f6 100644 --- a/open_data/README.md +++ b/open_data/README.md @@ -5,6 +5,8 @@ 1. [HQTA Stops](https://gis.data.ca.gov/datasets/f6c30480f0e84be699383192c099a6a4_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/MapServer) 1. [CA Transit Routes](https://gis.data.ca.gov/datasets/dd7cb74665a14859a59b8c31d3bc5a3e_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/MapServer) 1. [CA Transit Stops](https://gis.data.ca.gov/datasets/900992cc94ab49dbbb906d8f147c2a72_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/MapServer) +1. [CA Average Transit Speeds by Stop-to-Stop Segments](https://gis.data.ca.gov/datasets/4937eeb59fdb4e56ae75e64688c7f2c0_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/MapServer/0) +1. [CA Average Transit Speeds by Route and Time of Day](https://gis.data.ca.gov/datasets/071df783099f4224b7ebb54839eae007_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/MapServer/0) 1. All GTFS datasets [metadata/data dictionary](https://data.ca.gov/dataset/cal-itp-gtfs-ingest-pipeline-dataset/resource/e26bf6ee-419d-4a95-8e4c-e2b13d5de793) ## GTFS Schedule Routes & Stops Geospatial Data From 9fe9735286c24f0f5a0e5c2567e7cbbdfd0b8750 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 01:26:34 +0000 Subject: [PATCH 11/19] (remove): no longer need filtering nearest 10 to 2 vp script --- rt_segment_speeds/scripts/vp_around_stops.py | 269 ------------------- 1 file changed, 269 deletions(-) delete mode 100644 rt_segment_speeds/scripts/vp_around_stops.py diff --git a/rt_segment_speeds/scripts/vp_around_stops.py b/rt_segment_speeds/scripts/vp_around_stops.py deleted file mode 100644 index 9f76378e0..000000000 --- a/rt_segment_speeds/scripts/vp_around_stops.py +++ /dev/null @@ -1,269 +0,0 @@ -""" -Filter the nearest 10 neighbors down to the -nearest 2 neighbors for each stop position. -Attach the projected stop position against shape, -projected vp position against shape, and timestamps. -""" -import datetime -import geopandas as gpd -import pandas as pd -import sys - -from dask import delayed, compute -from loguru import logger -from pathlib import Path -from typing import Literal, Optional - -from segment_speed_utils import helpers -from shared_utils import geo_utils -from update_vars import SEGMENT_GCS, GTFS_DATA_DICT -from segment_speed_utils.project_vars import SEGMENT_TYPES, PROJECT_CRS - -def stops_projected_against_shape( - input_file: str, - analysis_date: str, - trip_stop_cols: list, -) -> pd.DataFrame: - """ - From nearest 10 vp points, project the stop geometry - onto shape geometry and get - stop_meters. - """ - stop_position = gpd.read_parquet( - f"{SEGMENT_GCS}{input_file}_{analysis_date}.parquet", - columns = trip_stop_cols + [ - "shape_array_key", "stop_geometry"], - ).to_crs(PROJECT_CRS) - - shapes = helpers.import_scheduled_shapes( - analysis_date, - columns = ["shape_array_key", "geometry"], - crs = PROJECT_CRS, - get_pandas = True - ) - - gdf = pd.merge( - stop_position, - shapes.rename(columns = {"geometry": "shape_geometry"}), - on = "shape_array_key", - how = "inner" - ) - - gdf = gdf.assign( - stop_meters = gdf.shape_geometry.project(gdf.stop_geometry), - )[trip_stop_cols + ["stop_meters"]] - - del shapes, stop_position - - return gdf - - -def explode_vp_nearest( - input_file: str, - analysis_date: str, - trip_stop_cols: list, -) -> pd.DataFrame: - """ - Take nearest 10 vp, which holds vp_idx as an array, - and explode it so it becomes long. - """ - vp_nearest = pd.read_parquet( - f"{SEGMENT_GCS}{input_file}_{analysis_date}.parquet", - columns = trip_stop_cols + [ - "shape_array_key", - "nearest_vp_arr"], - ).explode( - "nearest_vp_arr" - ).drop_duplicates().reset_index( - drop=True - ).rename( - columns = {"nearest_vp_arr": "vp_idx"} - ).astype({"vp_idx": "int64"}) - - return vp_nearest - - -def get_vp_projected_against_shape( - input_file: str, - analysis_date: str, - **kwargs -) -> pd.DataFrame: - """ - Put in subset of vp_idx (using the kwargs) - and turn the x, y into vp point geometry. - Merge in shapes and project the vp position - against shape geometry, and save out - shape_meters. - """ - # Get crosswalk of trip to shapes - trips_to_shapes = helpers.import_scheduled_trips( - analysis_date, - columns = ["trip_instance_key", "shape_array_key"], - get_pandas = True - ) - - # Get shapes - shapes = helpers.import_scheduled_shapes( - analysis_date, - columns = ["shape_array_key", "geometry"], - crs = PROJECT_CRS, - get_pandas = True - ) - - # Subset usable vp with only the ones present in exploded vp - # and turn those into vp geometry - vp = pd.read_parquet( - f"{SEGMENT_GCS}{input_file}_{analysis_date}", - columns = ["trip_instance_key", "vp_idx", "x", "y"], - **kwargs - ).pipe(geo_utils.vp_as_gdf, crs = PROJECT_CRS) - - # Merge all together so we can project vp point goem - # against shape line geom - gdf = pd.merge( - vp.rename(columns = {"geometry": "vp_geometry"}), - trips_to_shapes, - on = "trip_instance_key", - how = "inner" - ).merge( - shapes.rename(columns = {"geometry": "shape_geometry"}), - on = "shape_array_key", - how = "inner" - ).set_geometry("vp_geometry") - - del trips_to_shapes, shapes, vp - - gdf = gdf.assign( - shape_meters = gdf.shape_geometry.project(gdf.vp_geometry), - )[["vp_idx", "shape_meters"]] - - return gdf - - -def find_two_closest_vp( - df: pd.DataFrame, - group_cols: list -) -> pd.DataFrame: - """ - Based on the distances calculated between vp and stop, - keep the 2 observations that are closest. Find the smallest - positive distance and negative distance. - - This filters down the nearest 10 into nearest 2. - """ - positive_distances_df = df.loc[df.stop_vp_distance_meters >= 0] - negative_distances_df = df.loc[df.stop_vp_distance_meters < 0] - - #https://github.com/pandas-dev/pandas/issues/45089 - # add dropna=False or else too many combos are lost - min_pos_distance = ( - positive_distances_df - .groupby(group_cols, - observed=True, group_keys=False, dropna=False) - .agg({"stop_vp_distance_meters": "min"}) - .reset_index() - ) - - min_neg_distance = ( - negative_distances_df - .groupby(group_cols, - observed=True, group_keys=False, dropna=False) - .agg({"stop_vp_distance_meters": "max"}) - .reset_index() - ) - - two_vp = pd.concat( - [min_pos_distance, min_neg_distance], - axis=0, ignore_index=True - ) - - return two_vp - - -def filter_to_nearest_two_vp( - analysis_date: str, - segment_type: Literal[SEGMENT_TYPES], - config_path: Optional[Path] = GTFS_DATA_DICT -): - dict_inputs = config_path[segment_type] - trip_stop_cols = [*dict_inputs["trip_stop_cols"]] - USABLE_VP_FILE = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage2"] - EXPORT_FILE = dict_inputs["stage2b"] - - start = datetime.datetime.now() - - stop_meters_df = delayed(stops_projected_against_shape)( - INPUT_FILE, analysis_date, trip_stop_cols) - - vp_nearest = delayed(explode_vp_nearest)( - INPUT_FILE, analysis_date, trip_stop_cols) - - subset_vp = vp_nearest.vp_idx.unique() - - vp_meters_df = delayed(get_vp_projected_against_shape)( - USABLE_VP_FILE, - analysis_date, - filters = [[("vp_idx", "in", subset_vp)]] - ) - - gdf = delayed(pd.merge)( - vp_nearest, - stop_meters_df, - on = trip_stop_cols, - how = "inner" - ).merge( - vp_meters_df, - on = "vp_idx", - how = "inner" - ) - - # Calculate the distance between the stop and vp position - # This is used to find the minimum positive and minimum negative - # distance (get at vp before and after stop) - gdf = gdf.assign( - stop_meters = gdf.stop_meters.round(3), - shape_meters = gdf.shape_meters.round(3), - stop_vp_distance_meters = (gdf.stop_meters - gdf.shape_meters).round(2) - ) - - gdf_filtered = delayed(find_two_closest_vp)(gdf, trip_stop_cols) - - gdf2 = delayed(pd.merge)( - gdf, - gdf_filtered, - on = trip_stop_cols + ["stop_vp_distance_meters"], - how = "inner" - ) - - gdf2 = compute(gdf2)[0] - - del gdf, gdf_filtered, vp_nearest, stop_meters_df, vp_meters_df - - gdf2.to_parquet( - f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet", - ) - - end = datetime.datetime.now() - logger.info(f"nearest 2 vp for {segment_type} " - f"{analysis_date}: {end - start}") - - del gdf2 - - return - -''' -if __name__ == "__main__": - - from segment_speed_utils.project_vars import analysis_date_list - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] -''' \ No newline at end of file From 873a97cc80e6aacade880757160a74cb21c6c59e Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 01:31:04 +0000 Subject: [PATCH 12/19] (remove): array_utils, move into segment_calcs --- .../segment_speed_utils/array_utils.py | 37 ------------------- 1 file changed, 37 deletions(-) delete mode 100644 rt_segment_speeds/segment_speed_utils/array_utils.py diff --git a/rt_segment_speeds/segment_speed_utils/array_utils.py b/rt_segment_speeds/segment_speed_utils/array_utils.py deleted file mode 100644 index 295aae47e..000000000 --- a/rt_segment_speeds/segment_speed_utils/array_utils.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Functions for working with numpy arrays. -""" -import numpy as np -import pandas as pd - -from numba import jit - -def rolling_window_make_array( - df: pd.DataFrame, - window: int, - rolling_col: str -) -> pd.DataFrame: - # https://stackoverflow.com/questions/47482009/pandas-rolling-window-to-return-an-array - df[f"rolling_{rolling_col}"] = [ - np.asarray(window) for window in - df.groupby("trip_instance_key")[rolling_col].rolling( - window = window, center=True) - ] - - is_monotonic_series = np.vectorize(monotonic_check)(df[f"rolling_{rolling_col}"]) - df[f"{rolling_col}_monotonic"] = is_monotonic_series - - return df - -@jit(nopython=True) -def monotonic_check(arr: np.ndarray) -> bool: - """ - For an array, check if it's monotonically increasing. - https://stackoverflow.com/questions/4983258/check-list-monotonicity - """ - diff_arr = np.diff(arr) - - if np.all(diff_arr > 0): - return True - else: - return False \ No newline at end of file From 8255c381d4c4eca68af3404540923cbaa0026edd Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 01:31:37 +0000 Subject: [PATCH 13/19] (segment_speed_utils): remove unused functions --- .../segment_speed_utils/segment_calcs.py | 31 ++++++++++++++++++- .../segment_speed_utils/vp_transform.py | 17 +--------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py index 9bfac4613..004490ece 100644 --- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py +++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py @@ -1,3 +1,6 @@ +""" +Functions related to calculating segment speeds. +""" import dask.dataframe as dd import dask_geopandas as dg import geopandas as gpd @@ -149,4 +152,30 @@ def interpolate_stop_arrival_time( return np.interp( stop_position, np.asarray(shape_meters_arr), timestamp_arr - ).astype("datetime64[s]") \ No newline at end of file + ).astype("datetime64[s]") + + +def rolling_window_make_array( + df: pd.DataFrame, + window: int, + rolling_col: str +) -> pd.DataFrame: + """ + Interpolated stop arrival times are checked + to see if they are monotonically increasing. + If it isn't, it gets recalculated based on + stop_meters (the stop's position) relative to + other stop arrival times. + + https://stackoverflow.com/questions/47482009/pandas-rolling-window-to-return-an-array + """ + df[f"rolling_{rolling_col}"] = [ + np.asarray(window) for window in + df.groupby("trip_instance_key")[rolling_col].rolling( + window = window, center=True) + ] + + is_monotonic_series = np.vectorize(monotonic_check)(df[f"rolling_{rolling_col}"]) + df[f"{rolling_col}_monotonic"] = is_monotonic_series + + return df \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/vp_transform.py b/rt_segment_speeds/segment_speed_utils/vp_transform.py index 48694b585..bc41a9a90 100644 --- a/rt_segment_speeds/segment_speed_utils/vp_transform.py +++ b/rt_segment_speeds/segment_speed_utils/vp_transform.py @@ -53,19 +53,4 @@ def condense_point_geom_to_line( .reset_index() ) - return df3 - - -def sort_by_vp_idx_order( - vp_idx_array: np.ndarray, - geometry_array: np.ndarray, - timestamp_array: np.ndarray, -) -> tuple[np.ndarray]: - - sort_order = np.argsort(vp_idx_array, axis=0) - - vp_sorted = np.take_along_axis(vp_idx_array, sort_order, axis=0) - geom_sorted = np.take_along_axis(geometry_array, sort_order, axis=0) - timestamp_sorted = np.take_along_axis(timestamp_array, sort_order, axis=0) - - return vp_sorted, geom_sorted, timestamp_sorted \ No newline at end of file + return df3 \ No newline at end of file From d17d8b309507f3447c7746d20e4af8ca30ac89d1 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 01:31:55 +0000 Subject: [PATCH 14/19] update init after array_utils removed --- rt_segment_speeds/segment_speed_utils/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rt_segment_speeds/segment_speed_utils/__init__.py b/rt_segment_speeds/segment_speed_utils/__init__.py index 4d10f7d0e..d7e66563e 100644 --- a/rt_segment_speeds/segment_speed_utils/__init__.py +++ b/rt_segment_speeds/segment_speed_utils/__init__.py @@ -1,5 +1,4 @@ from . import ( - array_utils, gtfs_schedule_wrangling, helpers, metrics, @@ -12,7 +11,6 @@ ) __all__ = [ - "array_utils", "gtfs_schedule_wrangling", "helpers", "metrics", From bc6469835495caade0d52dea926bfdf0ab74ca58 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 16:17:55 +0000 Subject: [PATCH 15/19] rename functions for clarity, test sep-nov2024 dates for nearest vp step --- .../scripts/interpolate_stop_arrival.py | 5 +- .../scripts/nearest_vp_to_stop.py | 3 +- .../scripts/pipeline_rt_stop_times.py | 5 +- .../scripts/pipeline_segment_speeds.py | 7 +-- .../scripts/pipeline_speedmap.py | 7 +-- .../segment_speed_utils/neighbor.py | 54 +++++++++---------- .../segment_speed_utils/segment_calcs.py | 14 +++++ 7 files changed, 54 insertions(+), 41 deletions(-) diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index fafb920b8..0495893e9 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -17,8 +17,7 @@ from pathlib import Path from typing import Literal, Optional -from segment_speed_utils import (array_utils, helpers, - segment_calcs) +from segment_speed_utils import helpers, segment_calcs from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import PROJECT_CRS, SEGMENT_TYPES from shared_utils import rt_dates @@ -166,7 +165,7 @@ def enforce_monotonicity_and_interpolate_across_stops( df = segment_calcs.convert_timestamp_to_seconds( df, ["arrival_time"]) - df = array_utils.rolling_window_make_array( + df = segment_calcs.rolling_window_make_array( df, window = 3, rolling_col = "arrival_time_sec" ) diff --git a/rt_segment_speeds/scripts/nearest_vp_to_stop.py b/rt_segment_speeds/scripts/nearest_vp_to_stop.py index 2ea13aae0..6706637b4 100644 --- a/rt_segment_speeds/scripts/nearest_vp_to_stop.py +++ b/rt_segment_speeds/scripts/nearest_vp_to_stop.py @@ -4,6 +4,7 @@ """ import datetime import geopandas as gpd +import numpy as np import pandas as pd import sys @@ -158,7 +159,7 @@ def nearest_neighbor_for_stop( gdf = neighbor.merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( - neighbor.subset_arrays_to_valid_directions + neighbor.two_nearest_neighbor_near_stop )( gdf.vp_primary_direction, gdf.vp_geometry, diff --git a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py index a37199c96..39cc7567e 100644 --- a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py +++ b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py @@ -38,7 +38,7 @@ logger.remove() - + LOG_FILE = "../logs/interpolate_stop_arrival.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, @@ -57,7 +57,7 @@ logger.remove() - + LOG_FILE = "../logs/speeds_by_segment_trip.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, @@ -75,3 +75,4 @@ [compute(i)[0] for i in delayed_dfs] logger.remove() + \ No newline at end of file diff --git a/rt_segment_speeds/scripts/pipeline_segment_speeds.py b/rt_segment_speeds/scripts/pipeline_segment_speeds.py index fe8084eba..fa7922c14 100644 --- a/rt_segment_speeds/scripts/pipeline_segment_speeds.py +++ b/rt_segment_speeds/scripts/pipeline_segment_speeds.py @@ -17,7 +17,7 @@ if __name__ == "__main__": from segment_speed_utils.project_vars import analysis_date_list - + segment_type = "stop_segments" print(f"segment_type: {segment_type}") @@ -59,7 +59,7 @@ logger.remove() - + LOG_FILE = "../logs/speeds_by_segment_trip.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, @@ -76,4 +76,5 @@ [compute(i)[0] for i in delayed_dfs] - logger.remove() \ No newline at end of file + logger.remove() + \ No newline at end of file diff --git a/rt_segment_speeds/scripts/pipeline_speedmap.py b/rt_segment_speeds/scripts/pipeline_speedmap.py index 293db2e44..86997ebfe 100644 --- a/rt_segment_speeds/scripts/pipeline_speedmap.py +++ b/rt_segment_speeds/scripts/pipeline_speedmap.py @@ -90,7 +90,7 @@ def concatenate_speedmap_proxy_arrivals_with_remaining( logger.remove() - + LOG_FILE = "../logs/interpolate_stop_arrival.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, @@ -109,7 +109,7 @@ def concatenate_speedmap_proxy_arrivals_with_remaining( logger.remove() - + t0 = datetime.datetime.now() delayed_dfs = [ delayed(concatenate_speedmap_proxy_arrivals_with_remaining)( @@ -139,4 +139,5 @@ def concatenate_speedmap_proxy_arrivals_with_remaining( [compute(i)[0] for i in delayed_dfs] - logger.remove() \ No newline at end of file + logger.remove() + \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index f5d21ce84..2e3f9a307 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -15,7 +15,7 @@ def merge_stop_vp_for_nearest_neighbor( stop_times: gpd.GeoDataFrame, analysis_date: str, **kwargs -): +) -> gpd.GeoDataFrame: """ Merge stop times file with vp. vp gdf has been condensed so that all the vp coords @@ -64,7 +64,7 @@ def merge_stop_vp_for_nearest_neighbor( def find_nearest_points( - vp_coords_line: np.ndarray, + vp_coords_array: np.ndarray, target_stop: shapely.Point, vp_idx_array: np.ndarray, ) -> np.ndarray: @@ -79,7 +79,7 @@ def find_nearest_points( (ex: nearest 5 vp to each stop). """ indices = geo_utils.nearest_snap( - vp_coords_line, + vp_coords_array, target_stop, k_neighbors = 5 ) @@ -89,17 +89,17 @@ def find_nearest_points( # if we want 10 nearest neighbors and 8th, 9th, 10th are all # the same result, the 8th will have a result, then 9th and 10th will # return the length of the array (which is out-of-bounds) + # using vp_coords_array keeps too many points (is this because coords can be dupes?) indices2 = indices[indices < vp_idx_array.size] return indices2 def filter_to_nearest2_vp( - vp_coords_line: np.ndarray, + nearest_vp_coords_array: np.ndarray, shape_geometry: shapely.LineString, - vp_idx_array: np.ndarray, + nearest_vp_idx_array: np.ndarray, stop_meters: float, - indices_of_nearest: np.ndarray, ) -> tuple[np.ndarray]: """ Take the indices that are the nearest. @@ -109,16 +109,11 @@ def filter_to_nearest2_vp( Filter down to the nearest 2 vp before and after a stop. If there isn't one before or after, a value of -1 is returned. """ - # Subset the array of vp coords and vp_idx_array with - # the indices that show the nearest k neighbors. - nearest_vp = vp_coords_line[indices_of_nearest] - nearest_vp_idx = vp_idx_array[indices_of_nearest] - # Project these vp coords to shape geometry and see how far it is # from the stop's position on the shape nearest_vp_projected = np.asarray( [shape_geometry.project(shapely.Point(i)) - for i in nearest_vp] + for i in nearest_vp_coords_array] ) # Negative values are before the stop @@ -126,26 +121,28 @@ def filter_to_nearest2_vp( before_indices = np.where(nearest_vp_projected - stop_meters < 0)[0] after_indices = np.where(nearest_vp_projected - stop_meters > 0)[0] + # Set missing values when we're not able to find a nearest neighbor result + # use -1 as vp_idx (since this is not present in vp_usable) + # and zeroes for meters + before_idx = -1 + after_idx = -1 + before_vp_meters = 0 + after_vp_meters = 0 + # Grab the closest vp before a stop (-1 means array was empty) if before_indices.size > 0: - before_idx = nearest_vp_idx[before_indices][-1] + before_idx = nearest_vp_idx_array[before_indices][-1] before_vp_meters = nearest_vp_projected[before_indices][-1] - else: - before_idx = -1 - before_vp_meters = 0 - + # Grab the closest vp after a stop (-1 means array was empty) if after_indices.size > 0: - after_idx = nearest_vp_idx[after_indices][0] + after_idx = nearest_vp_idx_array[after_indices][0] after_vp_meters = nearest_vp_projected[after_indices][0] - else: - after_idx = -1 - after_vp_meters = 0 return before_idx, after_idx, before_vp_meters, after_vp_meters -def subset_arrays_to_valid_directions( +def two_nearest_neighbor_near_stop( vp_direction_array: np.ndarray, vp_geometry: shapely.LineString, vp_idx_array: np.ndarray, @@ -170,23 +167,22 @@ def subset_arrays_to_valid_directions( valid_indices = (vp_direction_array != opposite_direction).nonzero() # These are vp coords where index values of opposite direction is excluded - valid_vp_coords_line = np.array(vp_geometry.coords)[valid_indices] + valid_vp_coords_array = np.array(vp_geometry.coords)[valid_indices] # These are the subset of vp_idx values where opposite direction is excluded - valid_vp_idx_arr = np.asarray(vp_idx_array)[valid_indices] + valid_vp_idx_array = np.asarray(vp_idx_array)[valid_indices] nearest_indices = find_nearest_points( - valid_vp_coords_line, + valid_vp_coords_array, stop_geometry, - valid_vp_idx_arr, + valid_vp_idx_array, ) before_vp, after_vp, before_meters, after_meters = filter_to_nearest2_vp( - valid_vp_coords_line, + valid_vp_idx_array[nearest_indices], # subset of coords in nn shape_geometry, - valid_vp_idx_arr, + valid_vp_idx_array[nearest_indices], # subset of vp_idx in nn stop_meters, - nearest_indices, ) return before_vp, after_vp, before_meters, after_meters \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py index 004490ece..4fb21c5b8 100644 --- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py +++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +from numba import jit from typing import Union from shared_utils.rt_utils import MPH_PER_MPS @@ -155,6 +156,19 @@ def interpolate_stop_arrival_time( ).astype("datetime64[s]") +@jit(nopython=True) +def monotonic_check(arr: np.ndarray) -> bool: + """ + For an array, check if it's monotonically increasing. + https://stackoverflow.com/questions/4983258/check-list-monotonicity + """ + diff_arr = np.diff(arr) + + if np.all(diff_arr > 0): + return True + else: + return False + def rolling_window_make_array( df: pd.DataFrame, window: int, From 6a6ab6827f2b421b0fc70f86959acd6d1e5dd1f7 Mon Sep 17 00:00:00 2001 From: csuyat-dot Date: Thu, 19 Dec 2024 21:35:49 +0000 Subject: [PATCH 16/19] updated cover sheet template. started explore NB to update the save rtpa outputs function. mainly, changes to the excel writer portion so it only writes data related to the RTPA, not the entire dataset. --- ntd/cover_sheet_template.xlsx | Bin 12444 -> 12439 bytes ntd/explore_save_rtpa_outputs_update.ipynb | 307 +++++++++++++++++++++ 2 files changed, 307 insertions(+) create mode 100644 ntd/explore_save_rtpa_outputs_update.ipynb diff --git a/ntd/cover_sheet_template.xlsx b/ntd/cover_sheet_template.xlsx index df54954fee4f761f4e44f17d4da8d6c5dc9e087c..03cb9c6ee6cb7b2e473df2029bc836a23b62426b 100644 GIT binary patch delta 6000 zcmZ8_bx_>Rvi9P^B@iS?a0s@+f&{l9yFdu;?(X^nf(CaF?(Q03aS2XvC-{aC+_}8x z+s-I<0dhBZs5fbWG9g&z_KH2MMr!Ucgq9yT2A_D^w)vNfYa(@qBco{UTj@Tj{hok8txHrBKZnL0-+Wgkr*V<>cRE`!z-n!} z^-^WnRvX@+#@koago6erbte1}4`J?xByBt~YnlK*{o1PR$@F`YmXNUGTrQ_I7tr5s zD+YaOC0*;&ll$78zz?%{m#CggfOI!;FRmtdVMf+KfUnH3bwh|_2VFEndm8O8?Wp9w z1O{&?KOfF84~nl@S%|k9A`nlsX1w_xZ1TWKhG-e3g--36EqQjiV)FoRFyZ;WD5p-V zdzHjp_NlSzXF-Z3jE-%$4OhOih>EvwQVOXYW@WrC*5=uia+WlSp%tGCB(vb1BI9NS zI2w45ybP)>p(kRCVHfGGC|nS)C)-f*)t`4ja)5Kj`ahYsEi|{uwj@yT=hmxS9aK6~>@~ToLR2GO#?C)_gpgR3 z+S+MqpIqp?kHr@I?xRyBv(3h0f--`^O03i54Jch|AQ=BLZbZiha#F`G=4WGc-n)_0 zo=_}15WnN4AxDwk%GU&;Ye+_zn3#0tbL-`b+GaMoCHnr@B$%)Ixr0I6ls=$vl>XwU z*j5LLsU37;w_sfC$cl^t$z(AABg^iW6vrT`$$&qtQ)RF{N|j+@w0TQ?H?J9cP1I^1 zREX*;p=a60Lm|9JfU0@3a7 zL{fGPR@$#M)*iCE9>Q{4v!iQ6WhbMGYf@a!KIsJfqSQ|5b>R0UGUoUbI zdQJ0AJH2n$$*ZYv(y@ZZ6T9MOHWW3_e+R-OH_Z2!G(xplwVWx5)A~pqyd3QKnW4-WI*POd`t!y547Q3 z_=q;@4zA`y2(G9z%|EU@JcE)d+&@;C|fl7^-9$+sh`R9%?)=Npq>@u{h~OX&*@Z&`>R1DQ}57j zmep`B^O*f`i&Gb=Lj0|HK($F(E_?X>AOCK7&Rh33QOx>&fWVNb(6 z9M#n3NIhgSa?^C5`77vexr_Fbu0ke0tzLnCxB6R8eYwAID}cUQ;}#mJ_fAB&h;OK^ zudCb!GGl^|vRHhf5=&2nO*!)xcQ+WD8|34mmYE`*m1TpW(8Vjg@n^bave4?R#71I_lPK3U4~Z6?LN91ZtNA!K!J z!&PZ7a$3I{Xu1B(-d_=yErU}Tobf@H=F~CHHjnM8Co=G!iX6ADi8>?hHfx?zFMa7d zFzM(GCJ6NO^qiBQo)XhGqnE&VLHZ~=2pF<(5RsSwz~_N08Fzc6MNw>hcL{+LEt0Ok zg?EiS^etFVz>8K_fi*j->XevKzpKZiiLLL|hgObHzMk#4!0no9M3-xePS?}*l~=pI zy*-t0TH~YKPt)#ZLX4tp&brtm^BZ%7!$;HL6&zas-J9eNvD?er*^q& z(|9K^8TI(eQ5Jk0Y##Y~d<$JXH~TQn8$oToD8jl8+b_Pbb7XpKR=g5*XF57d;psUa zq1Ba={w(Y9Z!>Le?u7-{G3iNEz+~FkZ1(bhuUKC9XcFmZdGL~|zKaDZ^-EAk{!aD2}2v>@jKRvTQL7wX(B z_ezU%o7}cK+${TtpcV2gP_e+JC8@Ke!YG>d5_VheU{Eyvl4Si7I0!g+I^y@VjbVs7M^a!(`(2gbB** zT>WT=+c(La-TsYK(jN-Ic8{yoSj65eq9f&9$U)l+_%N1-Q|>9kTh+sY9M@<`)$byylVPQ@}ynt7ZEaA!=V2PW9YH z>`RD!I2)@1mtXwWRbe@E=t^SfZz1VXp3nyH7Ya%RW0$av!;$b~7QDS;HmtDMA=vD_ z3}pT9SEz%gwkq*eVWx+Y`Pa$5^pUUD$tY-A6tXPd3N|wuHh9$Z=@#L%9`s}pGfrTw zv>G06aJuFlj01}rJ>6NaRm2>4b|qC7`NxAide}*E^ zQn~~-euYfrvuqMTwmuNOK&oCPaHrS#u};_qf6d0qn0eF=}6wEVzw; zed@ED9qaQG+FMmY5jXYXDMEy%6VIOOvi$>k0H;NG951V~NbcRW)qDCsH2F_>+Tf!e zuY7FTmur#S9zNQF?EtC)vG0od|540X@Ae};7_}}7ZD{A5s_qHlKdIz%j2N^Jj3re~a07y98%UQe z$syTj)TF-*>a(Ru7jVLg}38c!XMj?%%lvN5cL&$oVNfI?msl1i9+{SN>Gbcz}6kNN`r`XW;CSXov zuCV5O;YdHKR?%!gz1LZzUdtgK;qVaaYcD)Tlj-+wq>}dpxgK=-gt5&oXjU}ww1fk0 zS6@%#2pzi{?{ySVKT1VH@RT~w*FV1+b;ck=;vH#6(on1U_Or8JXwtZ^0_Rj_MQP}bccIs}GLoH8Z zpp{Oex>O|lyTI4An{MttFmfgya`#6NNjB`Zu5qk^1J;?kcYMDhq;uaHhq%hA;#^5U zo_Ry^F6k(+Ts5$3&14NZf9aE|QZAAFN%BwN+V}PnqO)9SFmFA;iugj6(Zkl3hqhpR zUSaxHL0thRQpg*xwUAC!%F=70+@4bKCihQFc!AC)jv>QY7ed^u&-!P*DpM&oeJ9xO zrv9FfyY=Ns(;@6AK}*QA>G=#K8F)Nqdj*YaXSAMnE|v1$s4bEy^b?B@4#g&N7Ji}t z;;$5H6#T!v`75u_#0&7>!IK4V9mrC&7Rh$J@@)cXU(D*l;O*1yZ=C^z-iK1|Dj-Mu zZ8w2I{6Oogr8k(fu(8HhQuIeq9VJ*wNV_q8l8%j_^F9{a@&o_y=5bbB3tyYXw8sJT z;k|zG$QIw1r|>0xyStLj_*Ojc5s5*7on)HCtCusMb0Npix4zO}8^w0TarWC???F=T z@|#Fhj@UWs$ubB+T`dc*x_TCK6bcJltlhQM?{8}AC+)Ly;?{+Zumd@kw>nwywSD4P z=y*_z+w^JWhaNk*0$$F9%d(H)u*CA3RRV&86bij&$7_iO9PbxL5pT(0^r_LX>}JkAt#e7+pH>7w$)xCEbdVi~=8&%m!TsWxBBJol_rsi>5X{k^NHl1v zQqHlE^~O5adL@<;Z;ozxm|PqFrP-5tAnuW3#l~xN--Ybk9{WaHY62e(z~zuKG7GmP zG=|6L&t^Dp2;=qM&i$&3V_NW9U$$b$6n{jACAYrE)V@)thKY*EIftJGHbb5I598k; zbSHvHvqt~YvGd_em_C&aI;Bn9iFWOmn$TLs-r6T>3?B}`D>;hFap9<`l3n%Kw&t_(*1Y4Ebq{5jS2ft4^*q8paFNE8xLG_#3%%QFmKJIOw zqbpb=FMcKOj!%hOSxQN(6@q$+w>fFjtKyK88^RJ3tSE89n3QT=yF1q={c2$E`UD(BSNt@e+ zk$dC`hfbC$lecdll&EtRc=t|*d?ooEYdbQeM%^ney6MYIkt3)2j-4u}XuSGVK8TJh0;z>;ga@#JLg% zp%~{?-H?^FR^lA83}iYj=uNEd}j^)GR{BY)Bu*lu+uuiceW ztWJZ!BBY(%&av7~jn6%m$&9q?gA!Y4Lev7?K2d+ocz;8HWJrXI{LO1H>^y1@(WFT+ z#7KgF@U{Pw4%DGv%a7VW;pFvxqlS|+yl6kiR75HQr6`}=_od-P2w=TxcWzL$Z>gJy z7@FE~#<}jL8e{qdX_$^t8@NakYjDCpXpJmeIl(SJR`dT%#5`x5^Z zceErVv&WjKh9|8~3aCKs@cc!8jo3h?t!G`+)|yQ%4PR#hlLXJLh}`H-@Gr;?r(TsmdTSS+n6-VF4+_#qP`TuyEk8_=Z?=Mn~I_f{?ymTVa0 zBA14P^;Gn&Lc?=QPn8yw({N~vgao!Qu*bLGHP~Z{(uy!f1JQ?SloY<*y(VTqWPBJ> zTNx1Sn8}#1MoG%yZL$SzgevRC8I6beyk^CWkRIQJq*ffrVopUf2#cd9gM^Z&OMr5O zyRiovNSbdL)~jubA_`NFvf=!V68~#sGa$FyJPIK?o(feo;j{DnW3O7yJowky2lKC- z+oeI8))`!Cx?K~?FR~qX8B{{LxHMy?Fe1O#S>th5P+a~WyLRG!r*ntJ#N87-6wDxzQF*S_p}9PwvzNJk_-r^qIF*}TOm{Oou|b^T*-3K2s!bu zWMGSLx_Wd9{T0(E6)Y%A7~pTTvM;%k^0FG=4lLAAB+e>iN(eZr902iJ?z7qzOm{ij z-G+E88w9L7v!7NFeR=Sv@P`-uKYQ!CfX2auN`Vci3FxqOJtML~2M3A+f)J_h%ws*J;=oduOio-ZN|)ruvn@(R0Aea)yixHfR4 zeMjZnMqIA0nOKys3)Fl4#m3bb71y5d6;!<(z-rkG9Pq77I3;6CzQHgx#M8TjRZ>ZN zlJre+N9SGf4#eE>dH~9lz4tP7xr%0i9LO4xW=MZ@PudjdZtUM7xApHL`_}7k}zzr5g PNJwx7)1qp#{iFIHVRL6s delta 6036 zcmZvAbx_>R((VEY?hxGF7M9=^NN|S`AZTz~+-><`L4pKWJh%jRUo5Z$2^I+ME&&3; zCBT>WoO|y%_0{cvrn;W#?wXmN?x#ArUS?F9ft-p)xl}$Y--Xkh;Oc>N7OmJmTy|ek z*{~_==>Bjl?u)GxdvUemQ9a&$h|mjQGS-^m^YpaYr`qS%`w`Ue9WZ*PPzrsu(bW$= z0ES)xY_A^Oe;@Ge8uioDQv^(b^2l*I6WycKb>86Z*NR%A?VDSV`bM&<-Sm^9vA2@p zG1f!X7wZEXkd3pp;pw@dJu^_l#8_QRMSrm{*GUVkb9q?|H<^QSoi*_h`hlL(w<4fE zG^et_92u{wij86QanT?<#zWfEM%qYefc_!zX&6itEA<7<^uz~&o|#kJywQyF$m>LF zrgPq>?H|QN@rH!N{0hn=6!n%KS-2mgw!1?a4@xEom7tRgICqpS35!}MW%ZwYV)Zb0X+kZwDvd^&Ge#5B7E`gT|0*AGro@70Csp_ zFox}QkX3*KJ0`O|>$+5`)sR1y_|m{dL*}R2G(5ETP?4?iM0eDFRM^W($OczG!3Nc^ z0=1untyb_h*OE$nfVt(rPLR$rA1=n4TA*$@fcN0C>Ti${Z7yr>s;Db<*j;60wFuY7!*!0iO|-1 z5nDhbDw-lo7a$ssWzZYRx?*Aa$>lIi+W$hD0#m=7i!Iq&WBJeBF>5I9>9tuBbV%qL zmH|V{U)o?#P!IO3T4_m9B~*CZ^~8D0*VKa2+=e{D=nEm4w^{2Tb|nko5#~twCm7O7 zuaEn&ejchF!s|&Mer3LoeT>gN_(9RIo7h)*nc*Uo^n?~C0`+;u4(8bTk6vsOf}5YB zn7Ure5-QCbx=jw{N5AafHGreUV&Qy7^324On zo$A^~dwhNgh+mU#u^g^|BT_xJEAiIe$_fR$P;eA3IK-y1L#?3`hT;4}=YU;@m(rMj zp~~UjHM=zO8-G#Oz)yxC15GwTg4NI>Zr1{THEL$7`NR*|hg%|yCG6?JsES*L%4YJ_ z=G*bMt>#wL`8bYg&NjQI3ATPe%ecynxE}uA+}77?*L%*?HL*5eqt!oL^(>-Nhet-s zL*~mBczy~;XBg#!mT3a%2Oeg1*z8Gt?{Cy(ut$wAYenxnpk61U9Q+0v%6hF?ivE1m zQi$xXWgzX&>CU3_%rp>Q)^XhZ&=S`|2vgNqGG5AVb-THD@bcPNak(k);L}*nGA=f7 zzkS%+VY{(Rc?=>F?fo+lo~hF_EjlzMIk>gK*Wik+gbMVZW-21kK_FX35QySGWx>PF z+S=2D?_Uf5KlLFCz8L=rLKtz(eL>5!ih+hgT#%q!?>Lp>uYKWrl+IWe%HNa?FYgM# zn4>9D(rqMOLy=sSM*dODjW1^kfav`cFAUk$L2df}zH*lG{x?jFLdSx(1_v zs+wyN~ZS6P?m{63tM z7rH>gwnG%btIEkRYf+NtE!8LUm3GEOq`uBJllbErsmP*<0HL8!)Y~BygbqfrT=dWZ zVkV-7>+p_3&N!j4hq=$gHx<~=S>@#9jBQ4E(j^(ZJA>VEs#21ZPr@Dv%tNCn=Z3w5 z_1|1~C52x1V%OBJOPCKmoeAO*&)1WxU8aFKP=Q8~MOVH_jy9*gvnunCJqMf)5U zzQQfOY;~My66ba%^l!0aVMEWOIX@Xb=*}gPCvPwPqq8JIjbDIiq|qjJ}GzQ z1i3QRBKWG&byqWz@QviKXM2Xpm%XjU*d~mqHF86jSF*FeEnDcU)`cnYbX-WU_g%wD z4>YPs_zgq6ROfp+i2#X%#@lHZf|Wcee8stS*UpW8HyYMWYP#lIdUTCyK2FN`LLBys z&_T`GNDu97$ic!51_<={_%~A?ACoi8;}>Lb-`9k?2fs?nc)}IhSi4Ra#dI;-Y^b+* zd8sZ*X4!4Pef@4b=-a2?bK?r_-H#p1FHHI9(~uAMv$@lETEO)5&xgBH@7#xr&I&!? zz_l&t@%QnCclXN5iwK9as_1ERAbUQIlCHejQ2o&@w;(riH0a)MuBJwfhJ+us_cHu8 zq>_C!(sh$4XFdPJX+voiVOiGu{qqvJ^XW{2yAL5mXtqRJk-4u=0%vR}@Cg(p*yQNC zCZeTSEj|%^3<3TP6T~BPZG20^dC?Vcv*cy=HO8+zCp82rJ^3LPYdokbJ}L)iABB*J zzB{rUCPpS!3JQK#G8C`I6^};B8LufzT2{Fk?T_!13Elj0V<|*qg2nG=&hi8*=}6I- z(Z`tbJP+cA8*ThcKbHskzD83Pnc)Y)kH$z~B;Ys848QLH1?S>ydbp)B;rr=R{yfSD% z{8E(ZogLtJ$&sx+8xTv6}&kD znE>xtEOARSe%momOo^95^loy!Z`k3&_AbL4#hqSu zF6Z)u<42{dOWQ8$`!2M%)B@$xxM5He7@#4@V;T|CSWU-f*sdQrHWZ6UG0(25h>S*e zYt*nQ^0i!EnnoUxOU2F%#S)8DHh5h?CR{!Kfm+Q?t11$rBMf_aEk0)eTKl=))on|k zGbGfz2i)h$r&H3)Tu)IQE@GjO7W+M}-vtO_=C3{q_89s|VyQQ;J66`B%#?ugS?a>p zu!^eanNuk;5rNm8O?@U#w3BnX(E}Sc3Lh{m`7o=S*$h`D&AbE(`{HJ>|IuPx+&Ik@ zrrUMht0!nC?NaSsTm=ORSC$}+YgD6h$k7;g%}t|dBbQ(9jR^`39XDhmOg71_bwp3o z28i%P_$E#LjcfZ%=*U$*0*hqRe>Lcv08c-fzXw~2N>8-^7%?Q5h9tt<5H(EKhB@;T zOl-T#g^9+K{o^D`;a||+(Fli|W$Aw@$n}EZ^{AN*iAH^!x$ zsE3#_53O?~{=nbY#va>tty|jg-g6x(1zw1tt9pvjOo$)hiPH#tTa3$H$o}glLPXK+ zZ?weAzkC0NUf>o`S=u}M|10Ao{{_(81!g~HUeo`-BJn2nC3de2u6zx#J4ixTEyj-KRD|K{10e)W2UYzOU?&7 z6_0MIJevJuR??KLj>zHpXrv7&qyPp$VqK37`^$uDa6urvf44jjPd`U%kAM1HpT3F9 z4+tqtaYOoT(0G=Wx_t#pRCP@F6HI&AjcN6)nO^$D&3Mv2U{{>piERc?fru{-<^c<= z9`6)!{)0$FUFO&Es3N8@zNX+$IS8##50zfFS-~w_c&)3I|4j8HXBxQnx;>RQUhFgz zG3w_*#nff@g*r7c6pgj0P2RrJSL^xonpzpGHX1RNV%}J~%*3g*cc;U|soyI5cJPmn z42#e^@02vCf%fpMv1_|a9VF>y<4D0?b{4f%xF>w46zemaTpXpaeO31%v!AnPm1+P= zt6UoE_tKVs#nPEYcH;r+e5}xj_Z5hTCBze*4zd31IAFC=#ih@7MXg!yP7oN$`TgSd)rQ`FB!y5h#&G-KJpk( zB}d{tWKEk373i{FRwK`;vU?HXX7uqVPxK=HD4fJ4RfcI=+9@uRIyJ4YniLp`MYc@0TzHdgzx- zv|^R2eT?5$)7iNP8wd_G@3+5h{$)BVJcZ=VQ|(bfkX&A#JntF!9iVrT9WH*@`QlW< z$tdax{lM`Okj9mckYb=;nLhCgVPBxkSv#H`)<2u}BJ&pj#Ln2#cusdF+_&Z7726Ie zqxavtZ}|}>4_}w-J8@+tu-IY@zu--s_grqd#5X_r$VPkLJdbk3f8{57u__mGY@HUg zrvBM;XY_MgVw(KR)=Z7mS#7eoR>kp-uv->P#9}2gH9eRoP0dxvPKZOWIaCP(W5ImR z@iP8Va55M4&nLx3sly9N_)AF}Ns{QPfTFjf&eS4=UF3TbxSnOWqN$st@e2#iZy7BXsfkzo`ffj@;_rs83-qYuO=srBxN;1QwB*g(X*e+JGvK3Zz^(T z!TnjmQ>BxMweA58YWaN=*&`kg2Lj5bB?2^>;xY7(SHbN1{l&CxIHwEWN- zWy)7|?J^|l#74-Mo(`)tfr+*WB!p!}RB~xDhuVbvctv7laklmLCezV8f|?9KtX)i9 z%+}T$6SefJX1EsK>Eq*GKOQC2_r8{l`hoAW>vFN~b+MPnECCjvBQH^f!s2~gvn^0Votc=m}GoY$aFq@Wq0vZAODLD z(QOoy*%d>c{|I4yJc%nk?)$zQbr}V8Floo7Ls?R$kEbpVNHy57{{-rHyl;{R|DqBY zY}pEeG`slK=;#5c6kWqC_(sGbCYOmgCu`@oW)e{md=qGRJ>%*n^&*;%XWo~G1Bw$T>RD^;R?J|-sV=x1{ShbNoE zdrs0x7^1*c7-;bf%#!?lB84dIToi=MKxdU~m)U}va1~j$Abj+T7`#-=`^XSc2~pgu zs^{mq*9Ssw1`GloZJzI9*3zz>H$6YW?Baq4?e^kM%2L3ZkLNbqSj!v-*)_ozSf@MP z4#`)Nrmp7sSx=$LTHF+U3wj_PRT7aoCBzd3WL#?va=st2qodF!(Z#YI5%QIBy|)>w z{IQYfX9gk4f`*#bfPnc`!D~N`%_BLxkDM%f>wpK>4=!`Z&;YXof7kH2#tpIa^#C^2 zo~Mcp{)0L&GZply+0AoU1RMKY%pMVaJEFb3tYfX2P8gkOxe$EYfuWr1*V*-I%W~Lg-sRmHCXVGJ zTgWHQV8cO&H^~I39LEI?vnTZ1092xpE+DnPsSryRLvi#@}$^C6*AK!J& zPoxzjD<(wRBGW<@wHLnSD5Hmw;sY9kY-t4xQr(fz1Acg=uO7v}b28g9s_D#Yl(bq3 z+jfe!el*1GM^v+W&3&!Cf&D=Q&3WpYFfN+#%h-=lnq!x8>LSDhr;Np7I*AuBxsFpm z8pJ9_cxQ-B+h{mlbv^rX{7Kp-==M;;r0RixEgnzh*3fpF)tIN@nF<WM8|3_%?;1RrXsCcAs0@7#PTB<0hU=S$?9fbK8X9WFk`u~5eK%iJS zE*}HUf9MD3uhoBxAU#}(PX_gp6^_fs3a{bgLcL~#>> .groupby(..., group_keys=False)\n", + "\n", + "To adopt the future behavior and silence this warning, use \n", + "\n", + "\t>>> .groupby(..., group_keys=True)\n", + " previous_y_m_upt = (df.sort_values(sort_cols2)\n" + ] + } + ], + "source": [ + "df = produce_ntd_monthly_ridership_by_rtpa(YEAR, MONTH)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "32072e6c-73f8-4aaa-9340-e326e2a48942", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 95900 entries, 0 to 95899\n", + "Data columns (total 29 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 key 95900 non-null object \n", + " 1 ntd_id 95900 non-null object \n", + " 2 legacy_ntd_id 90420 non-null object \n", + " 3 agency 95900 non-null object \n", + " 4 reporter_type 95900 non-null object \n", + " 5 period_year_month 95900 non-null object \n", + " 6 period_year 95900 non-null int64 \n", + " 7 period_month 95900 non-null int64 \n", + " 8 uza_name 95900 non-null object \n", + " 9 primary_uza_code 95900 non-null object \n", + " 10 _3_mode 95900 non-null object \n", + " 11 mode 95900 non-null object \n", + " 12 mode_name 95900 non-null object \n", + " 13 service_type 95900 non-null object \n", + " 14 Status 95900 non-null object \n", + " 15 tos 95900 non-null object \n", + " 16 upt 52124 non-null float64 \n", + " 17 vrm 52282 non-null float64 \n", + " 18 vrh 52282 non-null float64 \n", + " 19 voms 52486 non-null float64 \n", + " 20 _dt 95900 non-null object \n", + " 21 execution_ts 95900 non-null datetime64[ns, UTC]\n", + " 22 RTPA 95900 non-null object \n", + " 23 _merge 95900 non-null category \n", + " 24 previous_y_m_upt 51915 non-null float64 \n", + " 25 change_1yr 48466 non-null float64 \n", + " 26 pct_change_1yr 48439 non-null float64 \n", + " 27 Mode_full 95900 non-null object \n", + " 28 TOS_full 95900 non-null object \n", + "dtypes: category(1), datetime64[ns, UTC](1), float64(7), int64(2), object(18)\n", + "memory usage: 21.3+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "904cf3b9-f2db-4b24-a8e2-bf93fdaf4966", + "metadata": {}, + "outputs": [], + "source": [ + "def save_rtpa_outputs(\n", + " df: pd.DataFrame, \n", + " year: int, \n", + " month: str,\n", + " upload_to_public: bool = False\n", + "):\n", + " \"\"\"\n", + " Export an excel for each RTPA, adds a READ ME tab, then writes into a folder.\n", + " Zip that folder. \n", + " Upload zipped file to GCS.\n", + " \"\"\"\n", + " col_dict ={\n", + " 'Uace Cd': \"UACE Code\",\n", + " 'Dt': \"Date\",\n", + " 'Ntd Id': \"NTD ID\",\n", + " 'Tos': \"Type of Service\",\n", + " 'Legacy Ntd Id': \"Legacy NTD ID\",\n", + " 'Upt': \"UPT\",\n", + " 'Vrm': \"VRM\",\n", + " 'Vrh': \"VRH\",\n", + " 'Voms': \"VOMS\",\n", + " 'Rtpa': \"RTPA\",\n", + " 'Previous Y M Upt': \"Previous Year/Month UPT\",\n", + " 'Change 1Yr': \"Change in 1 Year UPT\",\n", + " 'Pct Change 1Yr': \"Percent Change in 1 Year UPT\",\n", + " 'Tos Full': \"Type of Service Full Name\"\n", + "}\n", + " print(\"creating individual RTPA excel files\")\n", + " \n", + " for i in df[\"RTPA\"].unique():\n", + " \n", + " print(f\"creating excel file for: {i}\")\n", + " \n", + " # Filename should be snakecase\n", + " rtpa_snakecase = i.replace(' ', '_').lower()\n", + " \n", + " #insertng readme cover sheet, \n", + " cover_sheet = pd.read_excel(\"./cover_sheet_template.xlsx\", index_col = \"**NTD Monthly Ridership by RTPA**\")\n", + " cover_sheet.to_excel(\n", + " f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", sheet_name = \"README\")\n", + "\n", + " rtpa_data =( df[df[\"RTPA\"] == i]\n", + " .sort_values(\"ntd_id\")\n", + " #got error from excel not recognizing timezone, made list to include dropping \"execution_ts\" column\n", + " .drop(columns = [\"_merge\",\"execution_ts\"])\n", + " #cleaning column names\n", + " .rename(columns=lambda x: x.replace(\"_\",\" \").title().strip())\n", + " #rename columns\n", + " .rename(columns=col_dict)\n", + " )\n", + " #column lists for aggregations\n", + " agency_cols = [\"ntd_id\", \"agency\", \"RTPA\"]\n", + " mode_cols = [\"mode\", \"RTPA\"]\n", + " tos_cols = [\"tos\", \"RTPA\"]\n", + "\n", + " # Creating aggregations\n", + " by_agency_long = sum_by_group((df[df[\"RTPA\"] == i]), agency_cols) \n", + " by_mode_long = sum_by_group((df[df[\"RTPA\"] == i]), mode_cols)\n", + " by_tos_long = sum_by_group((df[df[\"RTPA\"] == i]), tos_cols)\n", + " \n", + " #writing pages to excel fil\n", + " with pd.ExcelWriter(f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", mode =\"a\") as writer:\n", + " rtpa_data.to_excel(writer, sheet_name = \"RTPA Ridership Data\", index=False)\n", + " by_agency_long.to_excel(writer, sheet_name = \"Aggregated by Agency\", index=False)\n", + " by_mode_long.to_excel(writer, sheet_name = \"Aggregated by Mode\", index=False)\n", + " by_tos_long.to_excel(writer, sheet_name = \"Aggregated by TOS\", index=False)\n", + " \n", + " print(\"zipping all excel files\")\n", + " \n", + " shutil.make_archive(f\"./{year}_{month}\", \"zip\", f\"{year}_{month}\")\n", + " \n", + " print(\"Zipped folder\")\n", + " \n", + " fs.upload(\n", + " f\"./{year}_{month}.zip\", \n", + " f\"{GCS_FILE_PATH}{year}_{month}.zip\"\n", + " )\n", + " \n", + " if upload_to_public:\n", + " fs.upload(\n", + " f\"./{year}_{month}.zip\",\n", + " f\"{PUBLIC_GCS}ntd_monthly_ridership/{year}_{month}.zip\"\n", + " )\n", + " \n", + " print(\"Uploaded to GCS\")\n", + " \n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "2eafa973-34c7-4cd3-ba96-3af84ab81453", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "creating individual RTPA excel files\n", + "creating excel file for: San Joaquin Council of Governments\n", + "creating excel file for: Orange County Transportation Authority\n", + "creating excel file for: Transportation Agency for Monterey County\n", + "creating excel file for: Riverside County Transportation Commission\n", + "creating excel file for: Metropolitan Transportation Commission\n", + "creating excel file for: San Diego Association of Governments\n", + "creating excel file for: San Bernardino County Transportation Authority\n", + "creating excel file for: Los Angeles County Metropolitan Transportation Authority\n", + "creating excel file for: Placer County Transportation Planning Agency\n", + "creating excel file for: Kings County Association of Governments\n", + "creating excel file for: Stanislaus Council of Governments\n", + "creating excel file for: Kern Council of Governments\n", + "creating excel file for: Santa Cruz County Transportation Commission\n", + "creating excel file for: Sacramento Area Council of Governments\n", + "creating excel file for: Santa Barbara County Association of Governments\n", + "creating excel file for: Fresno County Council of Governments\n", + "creating excel file for: Ventura County Transportation Commission\n", + "creating excel file for: San Luis Obispo Council of Governments\n", + "creating excel file for: Tulare County Association of Governments\n", + "creating excel file for: Shasta Regional Transportation Agency\n", + "creating excel file for: Butte County Association of Governments\n", + "creating excel file for: Merced County Association of Governments\n", + "creating excel file for: Imperial County Transportation Commission\n", + "creating excel file for: El Dorado County Transportation Commission\n", + "creating excel file for: Tahoe Regional Planning Agency\n", + "zipping all excel files\n", + "Zipped folder\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'fs' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[47], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mYEAR\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mMONTH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[43msave_rtpa_outputs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mYEAR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mMONTH\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_to_public\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[46], line 74\u001b[0m, in \u001b[0;36msave_rtpa_outputs\u001b[0;34m(df, year, month, upload_to_public)\u001b[0m\n\u001b[1;32m 70\u001b[0m shutil\u001b[38;5;241m.\u001b[39mmake_archive(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mZipped folder\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 74\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241m.\u001b[39mupload(\n\u001b[1;32m 75\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 76\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mGCS_FILE_PATH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 77\u001b[0m )\n\u001b[1;32m 79\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m upload_to_public:\n\u001b[1;32m 80\u001b[0m fs\u001b[38;5;241m.\u001b[39mupload(\n\u001b[1;32m 81\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mPUBLIC_GCS\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mntd_monthly_ridership/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmonth\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 83\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'fs' is not defined" + ] + } + ], + "source": [ + "os.makedirs(f\"./{YEAR}_{MONTH}/\")\n", + "save_rtpa_outputs(df, YEAR, MONTH, upload_to_public = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "400de2ab-2704-4998-85f6-a8e886689ad3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 639bd555186456f284cf4c11ee3bc7d85da38c5d Mon Sep 17 00:00:00 2001 From: csuyat-dot Date: Thu, 19 Dec 2024 21:43:42 +0000 Subject: [PATCH 17/19] moved draft function to main script --- ntd/explore_save_rtpa_outputs_update.ipynb | 2 +- ntd/monthly_ridership_by_rtpa.py | 50 ++++++++++++---------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/ntd/explore_save_rtpa_outputs_update.ipynb b/ntd/explore_save_rtpa_outputs_update.ipynb index ff8ed8aa0..a2a8cf423 100644 --- a/ntd/explore_save_rtpa_outputs_update.ipynb +++ b/ntd/explore_save_rtpa_outputs_update.ipynb @@ -168,7 +168,7 @@ " cover_sheet.to_excel(\n", " f\"./{year}_{month}/{rtpa_snakecase}.xlsx\", sheet_name = \"README\")\n", "\n", - " rtpa_data =( df[df[\"RTPA\"] == i]\n", + " rtpa_data =(df[df[\"RTPA\"] == i]\n", " .sort_values(\"ntd_id\")\n", " #got error from excel not recognizing timezone, made list to include dropping \"execution_ts\" column\n", " .drop(columns = [\"_merge\",\"execution_ts\"])\n", diff --git a/ntd/monthly_ridership_by_rtpa.py b/ntd/monthly_ridership_by_rtpa.py index 2998755ce..4fda7ba60 100644 --- a/ntd/monthly_ridership_by_rtpa.py +++ b/ntd/monthly_ridership_by_rtpa.py @@ -99,7 +99,7 @@ def save_rtpa_outputs( upload_to_public: bool = False ): """ - Export an excel for each RTPA, adds new tabs for: READ ME & agg by agency, tos and mode. then writes into a folder. + Export an excel for each RTPA, adds a READ ME tab, then writes into a folder. Zip that folder. Upload zipped file to GCS. """ @@ -119,46 +119,50 @@ def save_rtpa_outputs( 'Pct Change 1Yr': "Percent Change in 1 Year UPT", 'Tos Full': "Type of Service Full Name" } + print("creating individual RTPA excel files") for i in df["RTPA"].unique(): + + print(f"creating excel file for: {i}") + # Filename should be snakecase rtpa_snakecase = i.replace(' ', '_').lower() + + #insertng readme cover sheet, + cover_sheet = pd.read_excel("./cover_sheet_template.xlsx", index_col = "**NTD Monthly Ridership by RTPA**") + cover_sheet.to_excel( + f"./{year}_{month}/{rtpa_snakecase}.xlsx", sheet_name = "README") - (df[df["RTPA"] == i] + rtpa_data =(df[df["RTPA"] == i] .sort_values("ntd_id") #got error from excel not recognizing timezone, made list to include dropping "execution_ts" column - .drop(columns = [ - "_merge", - "execution_ts" - ]) + .drop(columns = ["_merge","execution_ts"]) #cleaning column names .rename(columns=lambda x: x.replace("_"," ").title().strip()) #rename columns .rename(columns=col_dict) - #updated to `to_excel`, added sheet_name - .to_excel( - f"./{year}_{month}/{rtpa_snakecase}.xlsx", sheet_name = "RTPA Ridership Data", - index = False) - - ) - #insertng readme cover sheet, - cover_sheet = pd.read_excel("./cover_sheet_template.xlsx", index_col = "NTD Monthly Ridership by RTPA") - + ) + #column lists for aggregations agency_cols = ["ntd_id", "agency", "RTPA"] mode_cols = ["mode", "RTPA"] tos_cols = ["tos", "RTPA"] - by_agency_long = sum_by_group(df, agency_cols) - by_mode_long = sum_by_group(df, mode_cols) - by_tos_long = sum_by_group(df, tos_cols) + # Creating aggregations + by_agency_long = sum_by_group((df[df["RTPA"] == i]), agency_cols) + by_mode_long = sum_by_group((df[df["RTPA"] == i]), mode_cols) + by_tos_long = sum_by_group((df[df["RTPA"] == i]), tos_cols) + #writing pages to excel fil with pd.ExcelWriter(f"./{year}_{month}/{rtpa_snakecase}.xlsx", mode ="a") as writer: - cover_sheet.to_excel(writer, sheet_name = "READ ME") - by_agency_long.to_excel(writer, sheet_name = "Aggregated by Agency") - by_mode_long.to_excel(writer, sheet_name = "Aggregated by Mode") - by_tos_long.to_excel(writer, sheet_name = "Aggregated by TOS") - + rtpa_data.to_excel(writer, sheet_name = "RTPA Ridership Data", index=False) + by_agency_long.to_excel(writer, sheet_name = "Aggregated by Agency", index=False) + by_mode_long.to_excel(writer, sheet_name = "Aggregated by Mode", index=False) + by_tos_long.to_excel(writer, sheet_name = "Aggregated by TOS", index=False) + + print("zipping all excel files") + shutil.make_archive(f"./{year}_{month}", "zip", f"{year}_{month}") + print("Zipped folder") fs.upload( From d9b2094915a4fe4932c126f1fed1fa200cde615c Mon Sep 17 00:00:00 2001 From: csuyat-dot Date: Thu, 19 Dec 2024 23:00:40 +0000 Subject: [PATCH 18/19] updated readme to follow the template readme in the portfolio directory --- ntd/README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/ntd/README.md b/ntd/README.md index 873d76444..cca2300da 100644 --- a/ntd/README.md +++ b/ntd/README.md @@ -1,10 +1,57 @@ -# NTD Monthly Ridership by RTPA +# Monthly NTD Ridership by RTPA -Provide CalSTA with NTD Monthly Ridership by each regional transportation planning authority (RTPA). +Provide CalSTA with NTD Monthly Ridership by each RTPA. + +Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf) +>Caltrans will provide all RTPAs with a summary report each month that meets the requirements of this statutory provision, drawn from the data reported to the National Transit Database. The data will be drawn from the NTD at: [Complete Monthly Ridership (with adjustments and estimates) | FTA (dot.gov)](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release). RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed. This report shows general ridership trends by transit agency, mode, and type of service. Reported unlinked passenger trips are reported, as well as the change from the prior year. For example, July 2023's change would be the change in July 2023's reported values against July 2022's reported values. -## Datasets -1. NTD monthly data: https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. -2. [RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true) -3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). \ No newline at end of file +## Definitions +- **FTA**: Federal Transit Admisistration. +- **NTD**: National Transit Database. A reporting system that collects public transportation financial and operating information. +- **RTPA**: Regional Transportation Planning Authority. +- **UZA**: Urbanized Areas. An urbanized area is an incorporated area with a population of 50,000 or more that is designated as such by the U.S. Department of Commerce, Bureau of the Census. +- **MODE**: A system for carrying transit passengers described by specific right-of-way (ROW), technology and operational features. Examples: Bus, Cable Car, Light Rail. +- **TOS**: Describes how public transportation services are provided by the transit agency: directly operated (DO) or purchased transportation (PT) services. + +## Methodology +Ridership data is ingested via the `FTA Complete Monthly Ridership` report, per the SB125 guidelines. Then filtered for agencies residing in California UZAs. These California Agencies are grouped by RTPAs, then aggregated by agencies, mode and TOS. The processed data for each RTPA is saved to a public respository, see datasets below. + + +## Frequently Asked Questions +**Q: Which agencies/transit operators are in this report? Why are some agencies missing from an RTPA?** + +Per the [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) webpage: +>File Summary: Contains monthly-updated service information reported by urban Full Reporters. + +Urban full reporters, that submit monthly ridership data to NTD, are included in this report. This report tracks data from 2018 to present. If an agency is not a monthly reporter, or has not reported data since 2018, they will not appear in the report. + + +**Q: Where can I download my RTPA's data?** + +Data from this report can be downloaded from the Cal-ITP public data repository, see `Fully Processed Data Download` below. A Google Account is required to access the repoisitory. Once logged in, navigate to `ntd_monthly_ridership`, click the year-month you want to download, then click `download`. + +The data is a zipped folder of all RTPA data for the year-month. + + +**Q: How can my RTPA/Agency meet the requirements of the SB125 Guidelines regarding how "to make publicly available a summary of ridership data"** + +Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf): +>RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed + +Hyperlinking this report on your RTPA's/Agency's webpage is a common method to meeting this requirement. + +## Datasets / Data Sources +- [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) +- [California RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true) +- [Fully Processed Data Download](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis) + + + +## Who We Are +This website was created by the [California Department of Transportation](https://dot.ca.gov/)'s Division of Data and Digital Services. We are a group of data analysts and scientists who analyze transportation data, such as General Transit Feed Specification (GTFS) data, or data from funding programs such as the Active Transportation Program. Our goal is to transform messy and indecipherable original datasets into usable, customer-friendly products to better the transportation landscape. For more of our work, visit our [portfolio](https://analysis.calitp.org/). + +Alt text Alt text + +
Caltrans®, the California Department of Transportation® and the Caltrans logo are registered service marks of the California Department of Transportation and may not be copied, distributed, displayed, reproduced or transmitted in any form without prior written permission from the California Department of Transportation. \ No newline at end of file From 9672baac18ba4a2196386d1418f6b9d8c64fcd0e Mon Sep 17 00:00:00 2001 From: csuyat-dot Date: Thu, 19 Dec 2024 23:44:13 +0000 Subject: [PATCH 19/19] redeployed NBs and site. no errors --- portfolio/ntd_monthly_ridership/README.md | 59 +++++++++++++++++-- ...ine-county-transportation-commission.ipynb | 4 +- ...te-county-association-of-governments.ipynb | 4 +- ...ado-county-transportation-commission.ipynb | 4 +- ...fresno-county-council-of-governments.ipynb | 4 +- ...ial-county-transportation-commission.ipynb | 2 +- ...rt__rtpa_kern-council-of-governments.ipynb | 2 +- ...gs-county-association-of-governments.ipynb | 4 +- ...etropolitan-transportation-authority.ipynb | 4 +- ...ed-county-association-of-governments.ipynb | 4 +- ...tropolitan-transportation-commission.ipynb | 4 +- ...ange-county-transportation-authority.ipynb | 4 +- ...ounty-transportation-planning-agency.ipynb | 4 +- ...ide-county-transportation-commission.ipynb | 4 +- ...cramento-area-council-of-governments.ipynb | 4 +- ...dino-county-transportation-authority.ipynb | 4 +- ...san-diego-association-of-governments.ipynb | 4 +- ...a_san-joaquin-council-of-governments.ipynb | 4 +- ...n-luis-obispo-council-of-governments.ipynb | 2 +- ...ra-county-association-of-governments.ipynb | 4 +- ...ruz-county-transportation-commission.ipynb | 4 +- ...hasta-regional-transportation-agency.ipynb | 4 +- ...pa_stanislaus-council-of-governments.ipynb | 4 +- ..._rtpa_tahoe-regional-planning-agency.ipynb | 4 +- ...portation-agency-for-monterey-county.ipynb | 4 +- ...re-county-association-of-governments.ipynb | 4 +- ...ura-county-transportation-commission.ipynb | 4 +- 27 files changed, 102 insertions(+), 55 deletions(-) diff --git a/portfolio/ntd_monthly_ridership/README.md b/portfolio/ntd_monthly_ridership/README.md index 873d76444..cca2300da 100644 --- a/portfolio/ntd_monthly_ridership/README.md +++ b/portfolio/ntd_monthly_ridership/README.md @@ -1,10 +1,57 @@ -# NTD Monthly Ridership by RTPA +# Monthly NTD Ridership by RTPA -Provide CalSTA with NTD Monthly Ridership by each regional transportation planning authority (RTPA). +Provide CalSTA with NTD Monthly Ridership by each RTPA. + +Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf) +>Caltrans will provide all RTPAs with a summary report each month that meets the requirements of this statutory provision, drawn from the data reported to the National Transit Database. The data will be drawn from the NTD at: [Complete Monthly Ridership (with adjustments and estimates) | FTA (dot.gov)](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release). RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed. This report shows general ridership trends by transit agency, mode, and type of service. Reported unlinked passenger trips are reported, as well as the change from the prior year. For example, July 2023's change would be the change in July 2023's reported values against July 2022's reported values. -## Datasets -1. NTD monthly data: https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release. -2. [RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true) -3. Download our processed full data [here](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis). \ No newline at end of file +## Definitions +- **FTA**: Federal Transit Admisistration. +- **NTD**: National Transit Database. A reporting system that collects public transportation financial and operating information. +- **RTPA**: Regional Transportation Planning Authority. +- **UZA**: Urbanized Areas. An urbanized area is an incorporated area with a population of 50,000 or more that is designated as such by the U.S. Department of Commerce, Bureau of the Census. +- **MODE**: A system for carrying transit passengers described by specific right-of-way (ROW), technology and operational features. Examples: Bus, Cable Car, Light Rail. +- **TOS**: Describes how public transportation services are provided by the transit agency: directly operated (DO) or purchased transportation (PT) services. + +## Methodology +Ridership data is ingested via the `FTA Complete Monthly Ridership` report, per the SB125 guidelines. Then filtered for agencies residing in California UZAs. These California Agencies are grouped by RTPAs, then aggregated by agencies, mode and TOS. The processed data for each RTPA is saved to a public respository, see datasets below. + + +## Frequently Asked Questions +**Q: Which agencies/transit operators are in this report? Why are some agencies missing from an RTPA?** + +Per the [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) webpage: +>File Summary: Contains monthly-updated service information reported by urban Full Reporters. + +Urban full reporters, that submit monthly ridership data to NTD, are included in this report. This report tracks data from 2018 to present. If an agency is not a monthly reporter, or has not reported data since 2018, they will not appear in the report. + + +**Q: Where can I download my RTPA's data?** + +Data from this report can be downloaded from the Cal-ITP public data repository, see `Fully Processed Data Download` below. A Google Account is required to access the repoisitory. Once logged in, navigate to `ntd_monthly_ridership`, click the year-month you want to download, then click `download`. + +The data is a zipped folder of all RTPA data for the year-month. + + +**Q: How can my RTPA/Agency meet the requirements of the SB125 Guidelines regarding how "to make publicly available a summary of ridership data"** + +Per the [SB125 Final Guildelines](https://calsta.ca.gov/-/media/calsta-media/documents/sb125-final-guidelines-a11y.pdf): +>RTPAs are required to post a link to this report and data in a manner easily accessed by the public, so that ridership trends within their region can be easily reviewed + +Hyperlinking this report on your RTPA's/Agency's webpage is a common method to meeting this requirement. + +## Datasets / Data Sources +- [NTD Complete Monthly Ridership Report](https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release) +- [California RTPA list](https://gis.data.ca.gov/datasets/CAEnergy::regional-transportation-planning-agencies/explore?appid=cf412a17daaa47bca93c6d6b7e77aff0&edit=true) +- [Fully Processed Data Download](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis) + + + +## Who We Are +This website was created by the [California Department of Transportation](https://dot.ca.gov/)'s Division of Data and Digital Services. We are a group of data analysts and scientists who analyze transportation data, such as General Transit Feed Specification (GTFS) data, or data from funding programs such as the Active Transportation Program. Our goal is to transform messy and indecipherable original datasets into usable, customer-friendly products to better the transportation landscape. For more of our work, visit our [portfolio](https://analysis.calitp.org/). + +Alt text Alt text + +
Caltrans®, the California Department of Transportation® and the Caltrans logo are registered service marks of the California Department of Transportation and may not be copied, distributed, displayed, reproduced or transmitted in any form without prior written permission from the California Department of Transportation. \ No newline at end of file diff --git a/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb index 2b4de639b..a6f664169 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_alpine-county-transportation-commission/00__monthly_ridership_report__rtpa_alpine-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a276d75088f0b1f3a8c57564d0d8da6f1b73f5597b6bb6477bc2b6daea96d9d -size 285768 +oid sha256:8175c7ae1a1266faa3cbb4694d1b3c3359b4b96f020b490ad20de7a4366f541d +size 55296 diff --git a/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb index a76c97328..fd30829e2 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_butte-county-association-of-governments/00__monthly_ridership_report__rtpa_butte-county-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c64de10d540d6424a865b05a483d3e43a664219850ac6afe5c0f86e59fc0985c -size 242916 +oid sha256:4ef7ce7e8441cf35982421340594c5b039b7ae13185e3fb82ce6db49927de349 +size 242920 diff --git a/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb index ce029e99b..5993b9591 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_el-dorado-county-transportation-commission/00__monthly_ridership_report__rtpa_el-dorado-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:997edb1a71b96bccf3f865ac2f530984c6859fb9e68576d6f499e8d6b6bced9a -size 172082 +oid sha256:125c31195abd45a8bae0e898a1549036fd1dc1ac5ba15d749bade6180580e87e +size 172080 diff --git a/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb index 7b1a7e25d..a79e127fe 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_fresno-county-council-of-governments/00__monthly_ridership_report__rtpa_fresno-county-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d04a2d51ef33edd60ef8c2a171fb27fab896570169bc61094fbb1e89f36820e -size 289290 +oid sha256:b442779c799f869997a29d96ea59746a5ed9d6fc2c109e2ffbc158b84724a390 +size 289289 diff --git a/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb index f36043669..46a66ea4c 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_imperial-county-transportation-commission/00__monthly_ridership_report__rtpa_imperial-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c67d2641700055fd75f1e84bdf3e5e086e5f714aa35444f82105b3bddabb184a +oid sha256:22c5c8c15f35b51bb9da84a3dfa5d14ddf1ab84b4f7166de83309f1d228b39d5 size 243104 diff --git a/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb index 460b889a7..3afecec53 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_kern-council-of-governments/00__monthly_ridership_report__rtpa_kern-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8469d0c4ed4738b572f638df6a8e614e9b12c9d7dd881712d3e6e217d4ba9252 +oid sha256:3d19afd5aff65868aeb07b883c2dddcbbe6fec936b56dc1484157f5eb0987634 size 267362 diff --git a/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb index e5f56c41b..5e7aa1817 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_kings-county-association-of-governments/00__monthly_ridership_report__rtpa_kings-county-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad663579ae80c120a055b8ceab62d7efed22e41f56e630a520edbd03a58968c6 -size 382975 +oid sha256:4ded7acfc9b0adfa8e7cea83637a38a84419a359c0df7dc72e44d123a4f999ba +size 382972 diff --git a/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb index d70bb1399..5cb66d9f7 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_los-angeles-county-metropolitan-transportation-authority/00__monthly_ridership_report__rtpa_los-angeles-county-metropolitan-transportation-authority.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2293626268ed34b7057941856c7f40e7d5efcab44c95fd9ba1d44e2b8b26cfce -size 1703520 +oid sha256:0ac81fb9ffd69c04e78506cdb7e5eb233e8025078e8767be88fe031cd26fcfe7 +size 1703518 diff --git a/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb index c2d9c4ea4..d76ced5d0 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_merced-county-association-of-governments/00__monthly_ridership_report__rtpa_merced-county-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ecb49c3bb8c480cad496c9fbfe64927c555a64a04017a1dbc00a470b2ce1e20 -size 266622 +oid sha256:80ba63a49c053301fce4678db22f3fa6610215eaa3c038855315af00065ad018 +size 266623 diff --git a/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb index bdea3fc3a..11a59df9d 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_metropolitan-transportation-commission/00__monthly_ridership_report__rtpa_metropolitan-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1eea1099aaddb36720b4035df4e1044bd55da4e6da0959d3d4de95d86ff4efa6 -size 1876738 +oid sha256:f08909c7433739c7b25bc68b14738bdfffacf1164ce6e46306943652f4151dbc +size 1928922 diff --git a/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb index 866cb6e6c..552f021c1 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_orange-county-transportation-authority/00__monthly_ridership_report__rtpa_orange-county-transportation-authority.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15c2b5782aeb17381411bf05925e3f0d05acce5e981ef54e7e164c3e226e96fd -size 471566 +oid sha256:4e1b96d3cdee2df61b841e4ead1e547ba304dde585331941db52c1ffd6de73e3 +size 471565 diff --git a/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb index 68e79c1f1..b5ddfbed1 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_placer-county-transportation-planning-agency/00__monthly_ridership_report__rtpa_placer-county-transportation-planning-agency.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:325117e4d6203a1aff736e37b29b8aa6d646abedce2bca61220cf461f11d5a99 -size 397570 +oid sha256:99ee7d6b604390ba6612e474fdea68a5fe5b3b02a11ba5af143a4410c40253a6 +size 397572 diff --git a/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb index a86a2a111..0396abc73 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_riverside-county-transportation-commission/00__monthly_ridership_report__rtpa_riverside-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab177dfee04294fa76f87e2825d6395f0fe792a343ad798ca5e55aa13489c050 -size 559223 +oid sha256:94e3e2b4b1c31edb03e18c286e7398279cfa12e7e4f326594c614fbd8625b334 +size 559224 diff --git a/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb index 653fc6522..9acbaea84 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_sacramento-area-council-of-governments/00__monthly_ridership_report__rtpa_sacramento-area-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:430fae617c35b2e10aa026984352780668cd51bfe7b3bf5953c5c176da7d6512 -size 643895 +oid sha256:f523f790b54d2f0eb95136378e5f17e3e0350cccbe41155a612ca2ebb881270d +size 643894 diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb index e26214f63..a5ea0bb3f 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_san-bernardino-county-transportation-authority/00__monthly_ridership_report__rtpa_san-bernardino-county-transportation-authority.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68da547505174438aa25d03ab7709e3dcc624763246c9de49753cf480633ddac -size 481470 +oid sha256:aa8402e33d377313339ef39123f817064f6727c5917dc917f6db6abbf3eaed29 +size 481473 diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb index 8700f4413..5a979d937 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_san-diego-association-of-governments/00__monthly_ridership_report__rtpa_san-diego-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed1ed101ac668d27c68aa924eb2e893883ea8fbad59366dd0c78f9deb135c7a5 -size 651290 +oid sha256:37e0c8f69a928bb92f8bef5c8e7a1e31d8f71934e40866ece86845717b03ef88 +size 651287 diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb index 98a7ecedd..880d178e6 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_san-joaquin-council-of-governments/00__monthly_ridership_report__rtpa_san-joaquin-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3cf105fa7eea1644e9d0e8f7d6869eccf5d698394b333a8d8a7dc0154ff995b2 -size 547511 +oid sha256:e4d4ad2a2a36da5b7bd8919512aad52fc59568616c24b333b9005962fbcc686d +size 547513 diff --git a/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb index bcaefe292..7dfd3bbb0 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_san-luis-obispo-council-of-governments/00__monthly_ridership_report__rtpa_san-luis-obispo-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19f18edc26b6f102f4d7a0476e9de5cfe5cef76f029c52e4abcac4c6c3ca7d49 +oid sha256:4cdbf6a4db5499ea74be91504bf7e4dce2a15d7ee56099ef75e2bb1e81acbe1a size 405964 diff --git a/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb index acc9a5999..0f24151dc 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_santa-barbara-county-association-of-governments/00__monthly_ridership_report__rtpa_santa-barbara-county-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3626a477ac7ed8f97c3da99e0abc7d329c1812700b34f6cb8123d935186850b8 -size 286678 +oid sha256:0b6e033cbb7178156a8a5b8cbf853a9861cbb8f45be22cb705e1980bce6ad1b1 +size 286675 diff --git a/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb index 953124ea9..efbdaa020 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_santa-cruz-county-transportation-commission/00__monthly_ridership_report__rtpa_santa-cruz-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28fac554d1b57006129e31c4ac876c0d311b29f96f553999eac9ff875c5d64ee -size 334857 +oid sha256:ed53648322da839e84ac46a9bb126c2e38c803eb0e77248592b986aa144bb862 +size 334856 diff --git a/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb index f7921cbff..98c08f817 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_shasta-regional-transportation-agency/00__monthly_ridership_report__rtpa_shasta-regional-transportation-agency.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:708194f890f7db0be8f385766fce790dc1df1f69b625d5ba5f11e68eb19853be -size 218791 +oid sha256:1938be6a9e1c55d85a6e47c402a23bc8bec3ffc492b3f6049a131ae9423d2f3d +size 218790 diff --git a/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb index aa2332d31..14224425c 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_stanislaus-council-of-governments/00__monthly_ridership_report__rtpa_stanislaus-council-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d416c7eee7306eb1860e79bf7b4fa57c95f4a0195818b4d13fcf05ac2e105c9 -size 386786 +oid sha256:d10c4e41d8b71b1bc83086883babaaf0edb375796decc8eb889ea1e22bbfe090 +size 386790 diff --git a/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb b/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb index c8aecb5e3..2fe183320 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_tahoe-regional-planning-agency/00__monthly_ridership_report__rtpa_tahoe-regional-planning-agency.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78a415dbe91e6c22723cbcbed9884667ef4bc9e9f953c800c356362f95389e54 -size 312772 +oid sha256:7d4c94166e07bd335e3cb91eded6c37b5e73689e091b2d4f6af7971c25d644b4 +size 312770 diff --git a/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb b/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb index 5f7b51604..b82f8ff40 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_transportation-agency-for-monterey-county/00__monthly_ridership_report__rtpa_transportation-agency-for-monterey-county.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2167e17f9e1ea68463222a639c1f17c1f0f919c30a65ea959deb3494343f9df -size 369190 +oid sha256:d5a30323d87bbc043f1076395f201295364ca0b04d126c8a4b20f66a4d196488 +size 369189 diff --git a/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb b/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb index e5f482332..208aaaf77 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_tulare-county-association-of-governments/00__monthly_ridership_report__rtpa_tulare-county-association-of-governments.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:243ab8491cb44017d0168405251d2b3ab336bd7298fcc4ab47c5572f3d9bbc93 -size 330267 +oid sha256:5628c9e6a539ef109bb0a825384a2869ab15a526451ac9b57c46b4e11afe6d30 +size 330266 diff --git a/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb b/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb index 0a2acf1f0..73f87fef9 100644 --- a/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb +++ b/portfolio/ntd_monthly_ridership/rtpa_ventura-county-transportation-commission/00__monthly_ridership_report__rtpa_ventura-county-transportation-commission.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f79e1488c4f4f1f20b59a6ee40043509d544b8b2d6509ad5bcd4c244c06c6cf -size 384881 +oid sha256:98677c4ed823c08f9a9b195ef12043bad5d813f1ec6050951e4d4cf97bce1ab7 +size 384880