From 3d95e3604e77b18777a4900214b44e91d1c3e49f Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:26:22 +0000 Subject: [PATCH 01/10] add dummy arrival time because gtfs-segments updated --- rt_segment_speeds/scripts/cut_stop_segments.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rt_segment_speeds/scripts/cut_stop_segments.py b/rt_segment_speeds/scripts/cut_stop_segments.py index a5ef2d2f6..8a5a94475 100644 --- a/rt_segment_speeds/scripts/cut_stop_segments.py +++ b/rt_segment_speeds/scripts/cut_stop_segments.py @@ -64,6 +64,12 @@ def stop_times_with_shape( subset="geometry" ).reset_index(drop=True).set_geometry("geometry") + # Add a dummy arrival_time that is needed in gtfs_segments that is not NaT + # or else it'll throw error in gtfs_segments.create_segments. Use zero instead. + df = df.assign( + arrival_time = 0 + ) + return df From a8796d38b0179316648b13e5adb41a01806101bf Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 23:49:45 +0000 Subject: [PATCH 02/10] (refactor): nearest 5 vp and filter to 2 --- .../scripts/new_nearest_vp_and_filter.py | 133 +++++++++++++ .../segment_speed_utils/neighbor.py | 174 +++++++++++++++++- 2 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py new file mode 100644 index 000000000..004f7e29d --- /dev/null +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -0,0 +1,133 @@ +import dask_geopandas as dg +import dask.dataframe as dd +import datetime +import geopandas as gpd +import numpy as np +import pandas as pd +import shapely +import sys + +from loguru import logger + +from shared_utils import rt_dates +from segment_speed_utils import helpers, neighbor +from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT +from segment_speed_utils.project_vars import PROJECT_CRS + + +def stop_times_for_shape_segments( + analysis_date: str, + dict_inputs: dict +) -> gpd.GeoDataFrame: + """ + This is the stop times table using only 1 shape for each + route-direction. Every trip belong to that shape + will be cut along the same stops. + This allows us to aggregate segments across trips because each + segment has the same stop_id1 and stop_id2. + """ + SEGMENT_FILE = dict_inputs["segments_file"] + + rt_trips = helpers.import_unique_vp_trips(analysis_date) + + shape_stop_combinations = pd.read_parquet( + f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet", + columns = ["trip_instance_key", + "stop_id1", "stop_pair", + "st_trip_instance_key"], + filters = [[ + #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"), + ("trip_instance_key", "in", rt_trips) + ]] + ).rename(columns = {"stop_id1": "stop_id"}) + + subset_trips = shape_stop_combinations.st_trip_instance_key.unique() + + stops_to_use = helpers.import_scheduled_stop_times( + analysis_date, + columns = ["trip_instance_key", "shape_array_key", + "stop_sequence", "stop_id", "stop_pair", + "stop_primary_direction", "geometry"], + filters = [[("trip_instance_key", "in", subset_trips)]], + get_pandas = True, + with_direction = True + ).rename(columns = {"trip_instance_key": "st_trip_instance_key"}) + + stop_times = pd.merge( + stops_to_use, + shape_stop_combinations, + on = ["st_trip_instance_key", "stop_id", "stop_pair"], + how = "inner" + ).drop( + columns = "st_trip_instance_key" + ).drop_duplicates().reset_index(drop=True) + + return stop_times + + +def new_nearest_neighbor_for_stop( + analysis_date: str, + segment_type = segment_type, + config_path = GTFS_DATA_DICT +): + """ + """ + start = datetime.datetime.now() + + dict_inputs = config_path[segment_type] + trip_stop_cols = [*dict_inputs["trip_stop_cols"]] + EXPORT_FILE = dict_inputs["stage2c"] + + stop_times = stop_times_for_shape_segments( + analysis_date, + dict_inputs + ) + + gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) + + vp_before, vp_after = np.vectorize( + neighbor.new_subset_arrays_to_valid_directions + )( + gdf.vp_primary_direction, + gdf.vp_geometry, + gdf.vp_idx, + gdf.stop_geometry, + gdf.stop_primary_direction, + gdf.shape_geometry, + gdf.stop_meters + ) + + gdf2 = gdf.assign( + before_vp_idx = vp_before, + after_vp_idx = vp_after, + )[trip_stop_cols + [ + "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"] + ] + + del gdf, stop_times + + gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet") + + end = datetime.datetime.now() + logger.info(f"nearest neighbor for {segment_type} " + f"{analysis_date}: {end - start}") + + return + + +''' +if __name__ == "__main__": + + from segment_speed_utils.project_vars import analysis_date_list + from dask import delayed, compute + + delayed_dfs = [ + delayed(new_nearest_neighbor_for_stop)( + analysis_date = analysis_date, + segment_type = segment_type, + config_path = GTFS_DATA_DICT + ) for analysis_date in analysis_date_list + ] + + [compute(i)[0] for i in delayed_dfs] +''' \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index ffa197f93..065da1330 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -8,7 +8,7 @@ from calitp_data_analysis.geography_utils import WGS84 from segment_speed_utils import gtfs_schedule_wrangling, vp_transform -from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT +from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils @@ -56,6 +56,50 @@ def merge_stop_vp_for_nearest_neighbor( return gdf +def new_merge_stop_vp_for_nearest_neighbor( + stop_times: gpd.GeoDataFrame, + analysis_date: str, + **kwargs +): + VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line + + vp_condensed = gpd.read_parquet( + f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet", + columns = ["trip_instance_key", + "vp_idx", "vp_primary_direction", + "geometry"], + **kwargs + ).to_crs(WGS84) + + shapes = helpers.import_scheduled_shapes( + analysis_date, + columns = ["shape_array_key", "geometry"], + crs = PROJECT_CRS, + get_pandas = True, + filters = [[("shape_array_key", "in", stop_times.shape_array_key.tolist())]] + ).rename(columns = {"geometry": "shape_geometry"}) + + gdf = pd.merge( + stop_times.rename( + columns = {"geometry": "stop_geometry"} + ).set_geometry("stop_geometry").to_crs(PROJECT_CRS), + vp_condensed.to_crs(PROJECT_CRS).rename( + columns = {"geometry": "vp_geometry"}), + on = "trip_instance_key", + how = "inner" + ).merge( + shapes, + on = "shape_array_key", + how = "inner" + ) + + gdf = gdf.assign( + stop_meters = gdf.shape_geometry.project(gdf.stop_geometry) + ) + + return gdf + + def subset_arrays_to_valid_directions( vp_direction_array: np.ndarray, vp_geometry: shapely.LineString, @@ -126,4 +170,130 @@ def add_nearest_neighbor_result_array( nearest_vp_arr = nearest_vp_arr_series ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"]) - return gdf2 \ No newline at end of file + return gdf2 + + + +def find_nearest_points( + vp_coords_line: np.ndarray, + target_stop: shapely.Point, + vp_idx_array: np.ndarray, +) -> np.ndarray: + """ + vp_coords_line is all the vehicle positions strung together as + coordinates in a linestring. + The target point is a stop. + + We want to find the k nearest points before/after a stop. + Start with k=5. + Returns an array that gives the indices that are the nearest k points + (ex: nearest 5 vp to each stop). + """ + indices = geo_utils.nearest_snap( + vp_coords_line, + target_stop, + k_neighbors = 5 + ) + + # nearest neighbor returns self.N + # if there are no nearest neighbor results found + # if we want 10 nearest neighbors and 8th, 9th, 10th are all + # the same result, the 8th will have a result, then 9th and 10th will + # return the length of the array (which is out-of-bounds) + indices2 = indices[indices < vp_idx_array.size] + + return indices2 + + +def filter_to_nearest2_vp( + vp_coords_line: np.ndarray, + shape_geometry: shapely.LineString, + vp_idx_array: np.ndarray, + stop_meters: float, + indices_of_nearest: np.ndarray, +) -> tuple[np.ndarray]: + """ + Take the indices that are the nearest. + Filter the vp coords down and project those against the shape_geometry. + Calculate how close those nearest k vp are to a stop (as they travel along a shape). + + Filter down to the nearest 2 vp before and after a stop. + If there isn't one before or after, a value of -1 is returned. + """ + # Subset the array of vp coords and vp_idx_array with + # the indices that show the nearest k neighbors. + nearest_vp = vp_coords_line[indices_of_nearest] + nearest_vp_idx = vp_idx_array[indices_of_nearest] + + # Project these vp coords to shape geometry and see how far it is + # from the stop's position on the shape + nearest_vp_projected = np.asarray( + [shape_geometry.project(shapely.Point(i)) - stop_meters + for i in nearest_vp] + ) + + # Negative values are before the stop + # Positive values are vp after the stop + before_indices = np.where(nearest_vp_projected < 0)[0] + after_indices = np.where(nearest_vp_projected > 0)[0] + + # Grab the closest vp before a stop (-1 means array was empty) + if before_indices.size > 0: + before = nearest_vp_idx[before_indices][-1] + else: + before = -1 + + # Grab the closest vp after a stop (-1 means array was empty) + if after_indices.size > 0: + after = nearest_vp_idx[after_indices][0] + else: + after = -1 + + return before, after + + +def new_subset_arrays_to_valid_directions( + vp_direction_array: np.ndarray, + vp_geometry: shapely.LineString, + vp_idx_array: np.ndarray, + stop_geometry: shapely.Point, + stop_direction: str, + shape_geometry: shapely.LineString, + stop_meters: float +) -> np.ndarray: + """ + Each row stores several arrays related to vp. + vp_direction is an array, vp_idx is an array, + and the linestring of vp coords can be coerced into an array. + + When we're doing nearest neighbor search, we want to + first filter the full array down to valid vp + before snapping it. + """ + opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] + + # These are the valid index values where opposite direction + # is excluded + valid_indices = (vp_direction_array != opposite_direction).nonzero() + + # These are vp coords where index values of opposite direction is excluded + valid_vp_coords_line = np.array(vp_geometry.coords)[valid_indices] + + # These are the subset of vp_idx values where opposite direction is excluded + valid_vp_idx_arr = np.asarray(vp_idx_array)[valid_indices] + + nearest_indices = find_nearest_points( + valid_vp_coords_line, + stop_geometry, + valid_vp_idx_arr, + ) + + before_vp, after_vp = filter_to_nearest2_vp( + valid_vp_coords_line, + shape_geometry, + valid_vp_idx_arr, + stop_meters, + nearest_indices, + ) + + return before_vp, after_vp \ No newline at end of file From 533c7392017d5ce061f5ab4407d7d11165633bb6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 23:50:28 +0000 Subject: [PATCH 03/10] could stage2c replace stage2/stage2b outputs? --- _shared_utils/shared_utils/gtfs_analytics_data.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index 5391c75b8..96ffcb1b7 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -74,6 +74,7 @@ stop_segments: stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_shape_segments" stage2b: "nearest/nearest2_vp_shape_segments" + stage2c: "nearest/wide_nearest2_vp_shape_segments" stage3: "stop_arrivals" stage4: "speeds_stop_segments" trip_stop_cols: ["trip_instance_key", "stop_sequence"] From bc84b9ff8a5142c51235085186c5fb33ec3bca5c Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Sat, 14 Dec 2024 01:05:13 +0000 Subject: [PATCH 04/10] (refactor): add vp shape meters for nearest neighbor results --- .../scripts/new_nearest_vp_and_filter.py | 27 +++++++++++------ .../segment_speed_utils/neighbor.py | 30 +++++++++++-------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py index 004f7e29d..63c299718 100644 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -1,5 +1,3 @@ -import dask_geopandas as dg -import dask.dataframe as dd import datetime import geopandas as gpd import numpy as np @@ -36,7 +34,6 @@ def stop_times_for_shape_segments( "stop_id1", "stop_pair", "st_trip_instance_key"], filters = [[ - #("schedule_gtfs_dataset_key", "==", "7cc0cb1871dfd558f11a2885c145d144"), ("trip_instance_key", "in", rt_trips) ]] ).rename(columns = {"stop_id1": "stop_id"}) @@ -67,7 +64,7 @@ def stop_times_for_shape_segments( def new_nearest_neighbor_for_stop( analysis_date: str, - segment_type = segment_type, + segment_type: str, config_path = GTFS_DATA_DICT ): """ @@ -85,7 +82,7 @@ def new_nearest_neighbor_for_stop( gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - vp_before, vp_after = np.vectorize( + vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize( neighbor.new_subset_arrays_to_valid_directions )( gdf.vp_primary_direction, @@ -100,8 +97,12 @@ def new_nearest_neighbor_for_stop( gdf2 = gdf.assign( before_vp_idx = vp_before, after_vp_idx = vp_after, + before_vp_meters = vp_before_m, + after_vp_meters = vp_after_m )[trip_stop_cols + [ - "shape_array_key", "stop_meters", "before_vp_idx", "after_vp_idx"] + "shape_array_key", "stop_meters", + "before_vp_idx", "after_vp_idx", + "before_vp_meters", "after_vp_meters"] ] del gdf, stop_times @@ -115,11 +116,20 @@ def new_nearest_neighbor_for_stop( return -''' + if __name__ == "__main__": - from segment_speed_utils.project_vars import analysis_date_list + #from segment_speed_utils.project_vars import analysis_date_list + from dask import delayed, compute + LOG_FILE = "../logs/test.log" + logger.add(LOG_FILE, retention="3 months") + logger.add(sys.stderr, + format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", + level="INFO") + + analysis_date_list = [rt_dates.DATES["sep2024"]] + segment_type = "stop_segments" delayed_dfs = [ delayed(new_nearest_neighbor_for_stop)( @@ -130,4 +140,3 @@ def new_nearest_neighbor_for_stop( ] [compute(i)[0] for i in delayed_dfs] -''' \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index 065da1330..f9c19ea2c 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -7,7 +7,7 @@ import shapely from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling, vp_transform +from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils @@ -69,7 +69,7 @@ def new_merge_stop_vp_for_nearest_neighbor( "vp_idx", "vp_primary_direction", "geometry"], **kwargs - ).to_crs(WGS84) + ) shapes = helpers.import_scheduled_shapes( analysis_date, @@ -228,28 +228,32 @@ def filter_to_nearest2_vp( # Project these vp coords to shape geometry and see how far it is # from the stop's position on the shape nearest_vp_projected = np.asarray( - [shape_geometry.project(shapely.Point(i)) - stop_meters + [shape_geometry.project(shapely.Point(i)) for i in nearest_vp] ) # Negative values are before the stop # Positive values are vp after the stop - before_indices = np.where(nearest_vp_projected < 0)[0] - after_indices = np.where(nearest_vp_projected > 0)[0] + before_indices = np.where(nearest_vp_projected - stop_meters < 0)[0] + after_indices = np.where(nearest_vp_projected - stop_meters > 0)[0] # Grab the closest vp before a stop (-1 means array was empty) if before_indices.size > 0: - before = nearest_vp_idx[before_indices][-1] + before_idx = nearest_vp_idx[before_indices][-1] + before_vp_meters = nearest_vp_projected[before_indices][-1] else: - before = -1 + before_idx = -1 + before_vp_meters = 0 # Grab the closest vp after a stop (-1 means array was empty) if after_indices.size > 0: - after = nearest_vp_idx[after_indices][0] + after_idx = nearest_vp_idx[after_indices][0] + after_vp_meters = nearest_vp_projected[after_indices][0] else: - after = -1 + after_idx = -1 + after_vp_meters = 0 - return before, after + return before_idx, after_idx, before_vp_meters, after_vp_meters def new_subset_arrays_to_valid_directions( @@ -287,8 +291,8 @@ def new_subset_arrays_to_valid_directions( stop_geometry, valid_vp_idx_arr, ) - - before_vp, after_vp = filter_to_nearest2_vp( + + before_vp, after_vp, before_meters, after_meters = filter_to_nearest2_vp( valid_vp_coords_line, shape_geometry, valid_vp_idx_arr, @@ -296,4 +300,4 @@ def new_subset_arrays_to_valid_directions( nearest_indices, ) - return before_vp, after_vp \ No newline at end of file + return before_vp, after_vp, before_meters, after_meters \ No newline at end of file From 4a26425adffacd05f53dfc1af07b222eee139b67 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 17 Dec 2024 18:53:44 +0000 Subject: [PATCH 05/10] rename columns in nearest vp output --- .../scripts/new_nearest_vp_and_filter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py index 63c299718..9a9b21fff 100644 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py @@ -82,7 +82,7 @@ def new_nearest_neighbor_for_stop( gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - vp_before, vp_after, vp_before_m, vp_after_m = np.vectorize( + vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( neighbor.new_subset_arrays_to_valid_directions )( gdf.vp_primary_direction, @@ -95,14 +95,14 @@ def new_nearest_neighbor_for_stop( ) gdf2 = gdf.assign( - before_vp_idx = vp_before, - after_vp_idx = vp_after, - before_vp_meters = vp_before_m, - after_vp_meters = vp_after_m + prior_vp_idx = vp_before, + subseq_vp_idx = vp_after, + prior_vp_meters = vp_before_meters, + subseq_vp_meters = vp_after_meters )[trip_stop_cols + [ "shape_array_key", "stop_meters", - "before_vp_idx", "after_vp_idx", - "before_vp_meters", "after_vp_meters"] + "prior_vp_idx", "subseq_vp_idx", + "prior_vp_meters", "subseq_vp_meters"] ] del gdf, stop_times From 94496c2296f61a7f33ecb0ee88ce4fd182376f34 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 17 Dec 2024 18:54:24 +0000 Subject: [PATCH 06/10] (refactor): stop arrival interpolation is better set up now, remove intermediate steps --- .../scripts/interpolate_stop_arrival.py | 101 ++++++------------ 1 file changed, 30 insertions(+), 71 deletions(-) diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index a155b74c1..c8b5a50c5 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -1,4 +1,10 @@ """ +Interpolate stop arrival +based on where the nearest 2 vp are +when stop position is between the 2 vp. + +Stop and vp geometries should be projected along the shape geometry, +Use `stop_meters`, `prior_vp_meters`, `subseq_vp_meters`. """ import datetime import geopandas as gpd @@ -33,62 +39,6 @@ def get_vp_timestamps( return vp -def consolidate_surrounding_vp( - df: pd.DataFrame, - group_cols: list, -) -> pd.DataFrame: - """ - This reshapes the df to wide so that each stop position has - a prior and subseq timestamp (now called vp_timestamp_local). - """ - df = df.assign( - obs = (df.sort_values(group_cols + ["vp_idx"]) - .groupby(group_cols, - observed=True, group_keys=False, dropna=False) - .cumcount() - ) - ) - - group_cols2 = group_cols + ["stop_meters"] - prefix_cols = ["vp_idx", "shape_meters"] - timestamp_cols = ["location_timestamp_local", "moving_timestamp_local"] - # since shape_meters actually might be decreasing as time progresses, - # (bus moving back towards origin of shape) - # we don't actually know that the smaller shape_meters is the first timestamp - # nor the larger shape_meters is the second timestamp. - # all we know is that stop_meters (stop) falls between these 2 shape_meters. - # sort by timestamp, and set the order to be 0, 1 - vp_before_stop = df.loc[df.obs==0][group_cols2 + prefix_cols + timestamp_cols] - vp_after_stop = df.loc[df.obs==1][group_cols2 + prefix_cols + timestamp_cols] - - # For the vp before the stop occurs, we want the maximum timestamp - # of the last position - # We want to keep the moving_timestamp (which is after it's dwelled) - vp_before_stop = vp_before_stop.assign( - prior_vp_timestamp_local = vp_before_stop.moving_timestamp_local, - ).rename( - columns = {**{i: f"prior_{i}" for i in prefix_cols}} - ).drop(columns = timestamp_cols) - - # For the vp after the stop occurs, we want the minimum timestamp - # of that next position - # Keep location_timetamp (before it dwells) - vp_after_stop = vp_after_stop.assign( - subseq_vp_timestamp_local = vp_after_stop.location_timestamp_local, - ).rename( - columns = {**{i: f"subseq_{i}" for i in prefix_cols}} - ).drop(columns = timestamp_cols) - - df_wide = pd.merge( - vp_before_stop, - vp_after_stop, - on = group_cols2, - how = "inner" - ) - - return df_wide - - def add_arrival_time( nearest_vp_input_file: str, vp_timestamp_file: str, @@ -106,7 +56,11 @@ def add_arrival_time( f"{SEGMENT_GCS}{nearest_vp_input_file}_{analysis_date}.parquet" ) - subset_vp = vp_filtered.vp_idx.unique() + subset_vp = np.unique( + np.concatenate( + (vp_filtered.prior_vp_idx.unique(), + vp_filtered.subseq_vp_idx.unique()) + )).tolist() vp_timestamps = get_vp_timestamps( vp_timestamp_file, @@ -116,10 +70,14 @@ def add_arrival_time( df = pd.merge( vp_filtered, - vp_timestamps, - on = "vp_idx", + vp_timestamps.add_prefix("prior_"), + on = "prior_vp_idx", + how = "inner" + ).merge( + vp_timestamps.add_prefix("subseq_"), + on = "subseq_vp_idx", how = "inner" - ).pipe(consolidate_surrounding_vp, group_cols) + ) arrival_time_series = [] @@ -128,13 +86,13 @@ def add_arrival_time( stop_position = getattr(row, "stop_meters") projected_points = np.asarray([ - getattr(row, "prior_shape_meters"), - getattr(row, "subseq_shape_meters") + getattr(row, "prior_vp_meters"), + getattr(row, "subseq_vp_meters") ]) timestamp_arr = np.asarray([ - getattr(row, "prior_vp_timestamp_local"), - getattr(row, "subseq_vp_timestamp_local"), + getattr(row, "prior_moving_timestamp_local"), + getattr(row, "subseq_location_timestamp_local"), ]) @@ -215,12 +173,13 @@ def enforce_monotonicity_and_interpolate_across_stops( ) # Subset to trips that have at least 1 obs that violates monotonicity - trips_with_one_false = (df.groupby("trip_instance_key") - .agg({"arrival_time_sec_monotonic": "min"}) - .reset_index() - .query('arrival_time_sec_monotonic==0') - .trip_instance_key - ) + trips_with_one_false = ( + df.groupby("trip_instance_key") + .agg({"arrival_time_sec_monotonic": "min"}) + .reset_index() + .query('arrival_time_sec_monotonic==0') + .trip_instance_key + ) # Set arrival times to NaT if it's not monotonically increasing mask = df.arrival_time_sec_monotonic == False @@ -254,7 +213,7 @@ def interpolate_stop_arrivals( dict_inputs = config_path[segment_type] trip_stop_cols = [*dict_inputs["trip_stop_cols"]] USABLE_VP_FILE = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage2b"] + INPUT_FILE = dict_inputs["stage2c"] STOP_ARRIVALS_FILE = dict_inputs["stage3"] start = datetime.datetime.now() From 06770ca6f3781ab669e0a7c2d9a9778ae28f6d2d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:09:11 +0000 Subject: [PATCH 07/10] (refactor): refactored changes put into scripts --- .../scripts/interpolate_stop_arrival.py | 2 +- .../scripts/nearest_vp_to_stop.py | 50 ++++--- .../scripts/pipeline_rt_stop_times.py | 17 +-- .../scripts/pipeline_segment_speeds.py | 14 -- .../scripts/pipeline_speedmap.py | 12 -- .../segment_speed_utils/neighbor.py | 131 ++---------------- rt_segment_speeds/setup.py | 2 +- 7 files changed, 42 insertions(+), 186 deletions(-) diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index c8b5a50c5..4031a0c87 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -213,7 +213,7 @@ def interpolate_stop_arrivals( dict_inputs = config_path[segment_type] trip_stop_cols = [*dict_inputs["trip_stop_cols"]] USABLE_VP_FILE = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage2c"] + INPUT_FILE = dict_inputs["stage2"] STOP_ARRIVALS_FILE = dict_inputs["stage3"] start = datetime.datetime.now() diff --git a/rt_segment_speeds/scripts/nearest_vp_to_stop.py b/rt_segment_speeds/scripts/nearest_vp_to_stop.py index 25e59f742..2ea13aae0 100644 --- a/rt_segment_speeds/scripts/nearest_vp_to_stop.py +++ b/rt_segment_speeds/scripts/nearest_vp_to_stop.py @@ -12,7 +12,6 @@ from typing import Literal, Optional from calitp_data_analysis.geography_utils import WGS84 -from calitp_data_analysis import utils from segment_speed_utils import helpers, neighbor from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import SEGMENT_TYPES @@ -155,31 +154,40 @@ def nearest_neighbor_for_stop( else: print(f"{segment_type} is not valid") - gdf = neighbor.merge_stop_vp_for_nearest_neighbor( - stop_times, analysis_date) - - results = neighbor.add_nearest_neighbor_result_array(gdf, analysis_date) - - # Keep columns from results that are consistent across segment types - # use trip_stop_cols as a way to uniquely key into a row - keep_cols = trip_stop_cols + [ - "shape_array_key", - "stop_geometry", - "nearest_vp_arr" - ] - utils.geoparquet_gcs_export( - results[keep_cols], - SEGMENT_GCS, - EXPORT_FILE, + gdf = neighbor.merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) + + vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( + neighbor.subset_arrays_to_valid_directions + )( + gdf.vp_primary_direction, + gdf.vp_geometry, + gdf.vp_idx, + gdf.stop_geometry, + gdf.stop_primary_direction, + gdf.shape_geometry, + gdf.stop_meters ) + + gdf2 = gdf.assign( + prior_vp_idx = vp_before, + subseq_vp_idx = vp_after, + prior_vp_meters = vp_before_meters, + subseq_vp_meters = vp_after_meters + )[trip_stop_cols + [ + "shape_array_key", "stop_meters", + "prior_vp_idx", "subseq_vp_idx", + "prior_vp_meters", "subseq_vp_meters"] + ] + + del gdf, stop_times + + gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}.parquet") end = datetime.datetime.now() logger.info(f"nearest neighbor for {segment_type} " - f"{analysis_date}: {end - start}") - - del gdf, stop_times, results - + f"{analysis_date}: {end - start}") + return ''' diff --git a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py index d428933a3..a37199c96 100644 --- a/rt_segment_speeds/scripts/pipeline_rt_stop_times.py +++ b/rt_segment_speeds/scripts/pipeline_rt_stop_times.py @@ -9,7 +9,6 @@ from loguru import logger from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals from update_vars import GTFS_DATA_DICT @@ -36,21 +35,7 @@ ] [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - + logger.remove() diff --git a/rt_segment_speeds/scripts/pipeline_segment_speeds.py b/rt_segment_speeds/scripts/pipeline_segment_speeds.py index df3862fcd..fe8084eba 100644 --- a/rt_segment_speeds/scripts/pipeline_segment_speeds.py +++ b/rt_segment_speeds/scripts/pipeline_segment_speeds.py @@ -9,7 +9,6 @@ from loguru import logger from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals from update_vars import GTFS_DATA_DICT @@ -40,19 +39,6 @@ del delayed_dfs - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] - - del delayed_dfs - logger.remove() LOG_FILE = "../logs/interpolate_stop_arrival.log" diff --git a/rt_segment_speeds/scripts/pipeline_speedmap.py b/rt_segment_speeds/scripts/pipeline_speedmap.py index 545047217..293db2e44 100644 --- a/rt_segment_speeds/scripts/pipeline_speedmap.py +++ b/rt_segment_speeds/scripts/pipeline_speedmap.py @@ -14,7 +14,6 @@ from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from nearest_vp_to_stop import nearest_neighbor_for_stop -from vp_around_stops import filter_to_nearest_two_vp from interpolate_stop_arrival import interpolate_stop_arrivals from stop_arrivals_to_speed import calculate_speed_from_stop_arrivals @@ -88,17 +87,6 @@ def concatenate_speedmap_proxy_arrivals_with_remaining( ] [compute(i)[0] for i in delayed_dfs] - - - delayed_dfs = [ - delayed(filter_to_nearest_two_vp)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] logger.remove() diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index f9c19ea2c..f5d21ce84 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -6,61 +6,21 @@ import pandas as pd import shapely -from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling, helpers, vp_transform +from segment_speed_utils import helpers, vp_transform from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT, PROJECT_CRS from shared_utils import geo_utils -def add_nearest_vp_idx( - vp_linestring: shapely.LineString, - stop: shapely.Point, - vp_idx_arr: np.ndarray -) -> int: - """ - Index into where the nearest vp is to the stop, - and return that vp_idx value from the vp_idx array. - """ - idx = geo_utils.nearest_snap(vp_linestring, stop, k_neighbors=1) - - return vp_idx_arr[idx] - - def merge_stop_vp_for_nearest_neighbor( stop_times: gpd.GeoDataFrame, analysis_date: str, **kwargs -) -> gpd.GeoDataFrame: - VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line - - vp_condensed = gpd.read_parquet( - f"{SEGMENT_GCS}{VP_NN}_{analysis_date}.parquet", - columns = ["trip_instance_key", - "vp_idx", "vp_primary_direction", - "geometry"], - **kwargs - ).to_crs(WGS84) - - gdf = pd.merge( - stop_times.rename( - columns = {"geometry": "stop_geometry"} - ).set_geometry("stop_geometry").to_crs(WGS84), - vp_condensed.rename( - columns = { - "geometry": "vp_geometry" - }), - on = "trip_instance_key", - how = "inner" - ) - - return gdf - - -def new_merge_stop_vp_for_nearest_neighbor( - stop_times: gpd.GeoDataFrame, - analysis_date: str, - **kwargs ): + """ + Merge stop times file with vp. + vp gdf has been condensed so that all the vp coords + make up coordinates of a linestring. + """ VP_NN = GTFS_DATA_DICT.speeds_tables.vp_condensed_line vp_condensed = gpd.read_parquet( @@ -93,6 +53,9 @@ def new_merge_stop_vp_for_nearest_neighbor( how = "inner" ) + # Calculate stop_meters, which is the stop geometry + # projected onto shape_geometry and is interpreted as + # stop X is Y meters along shape gdf = gdf.assign( stop_meters = gdf.shape_geometry.project(gdf.stop_geometry) ) @@ -100,80 +63,6 @@ def new_merge_stop_vp_for_nearest_neighbor( return gdf -def subset_arrays_to_valid_directions( - vp_direction_array: np.ndarray, - vp_geometry: shapely.LineString, - vp_idx_array: np.ndarray, - stop_geometry: shapely.Point, - stop_direction: str, -) -> np.ndarray: - """ - Each row stores several arrays related to vp. - vp_direction is an array, vp_idx is an array, - and the linestring of vp coords can be coerced into an array. - - When we're doing nearest neighbor search, we want to - first filter the full array down to valid vp - before snapping it. - """ - N_NEAREST_POINTS = 10 - - opposite_direction = vp_transform.OPPOSITE_DIRECTIONS[stop_direction] - - # These are the valid index values where opposite direction - # is excluded - valid_indices = (vp_direction_array != opposite_direction).nonzero() - - vp_coords_line = np.array(vp_geometry.coords)[valid_indices] - - vp_idx_arr = np.asarray(vp_idx_array)[valid_indices] - - np_inds = geo_utils.nearest_snap( - vp_coords_line, stop_geometry, N_NEAREST_POINTS - ) - - # nearest neighbor returns self.N - # if there are no nearest neighbor results found - # if we want 10 nearest neighbors and 8th, 9th, 10th are all - # the same result, the 8th will have a result, then 9th and 10th will - # return the length of the array (which is out-of-bounds) - np_inds2 = np_inds[np_inds < vp_idx_arr.size] - - nearest_vp_arr = vp_idx_arr[np_inds2] - - return nearest_vp_arr - - -def add_nearest_neighbor_result_array( - gdf: gpd.GeoDataFrame, - analysis_date: str, - **kwargs -) -> pd.DataFrame: - """ - Add the nearest k_neighbors result. - """ - nearest_vp_arr_series = [] - - for row in gdf.itertuples(): - - nearest_vp_arr = subset_arrays_to_valid_directions( - getattr(row, "vp_primary_direction"), - getattr(row, "vp_geometry"), - getattr(row, "vp_idx"), - getattr(row, "stop_geometry"), - getattr(row, "stop_primary_direction"), - ) - - nearest_vp_arr_series.append(nearest_vp_arr) - - gdf2 = gdf.assign( - nearest_vp_arr = nearest_vp_arr_series - ).drop(columns = ["vp_primary_direction", "vp_idx", "vp_geometry"]) - - return gdf2 - - - def find_nearest_points( vp_coords_line: np.ndarray, target_stop: shapely.Point, @@ -256,7 +145,7 @@ def filter_to_nearest2_vp( return before_idx, after_idx, before_vp_meters, after_vp_meters -def new_subset_arrays_to_valid_directions( +def subset_arrays_to_valid_directions( vp_direction_array: np.ndarray, vp_geometry: shapely.LineString, vp_idx_array: np.ndarray, diff --git a/rt_segment_speeds/setup.py b/rt_segment_speeds/setup.py index be6555fa4..ee07f9071 100644 --- a/rt_segment_speeds/setup.py +++ b/rt_segment_speeds/setup.py @@ -3,7 +3,7 @@ setup( name="segment_speed_utils", packages=find_packages(), - version="1.6", + version="1.7", description="Utility functions for GTFS RT segment speeds", author="Cal-ITP", license="Apache", From 493712eaec2c461ff508cd86ef93d05f03055cc7 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:09:56 +0000 Subject: [PATCH 08/10] (refactor): stage2, 2b now consolidated as stage2 --- _shared_utils/shared_utils/gtfs_analytics_data.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index 96ffcb1b7..712d31dc1 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -73,8 +73,6 @@ stop_segments: dir: ${gcs_paths.SEGMENT_GCS} stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_shape_segments" - stage2b: "nearest/nearest2_vp_shape_segments" - stage2c: "nearest/wide_nearest2_vp_shape_segments" stage3: "stop_arrivals" stage4: "speeds_stop_segments" trip_stop_cols: ["trip_instance_key", "stop_sequence"] @@ -82,7 +80,7 @@ stop_segments: stop_pair_cols: ["stop_pair", "stop_pair_name"] route_dir_cols: ["route_id", "direction_id"] segment_cols: ["route_id", "direction_id", "stop_pair", "geometry"] - shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024 + #shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024 route_dir_single_segment: "rollup_singleday/speeds_route_dir_segments" route_dir_single_segment_detail: "rollup_singleday/speeds_route_dir_segments_detail" # interim for speedmaps route_dir_multi_segment: "rollup_multiday/speeds_route_dir_segments" @@ -95,7 +93,6 @@ rt_stop_times: dir: ${gcs_paths.SEGMENT_GCS} stage1: ${speeds_tables.vp_dwell} stage2: "nearest/nearest_vp_rt_stop_times" - stage2b: "nearest/nearest2_vp_rt_stop_times" stage3: "rt_stop_times/stop_arrivals" stage4: "rt_stop_times/speeds" trip_stop_cols: ["trip_instance_key", "stop_sequence"] @@ -116,7 +113,6 @@ speedmap_segments: stage1: ${speeds_tables.vp_dwell} proxy_stop_times: "stop_time_expansion/speedmap_stop_times" stage2: "nearest/nearest_vp_speedmap_proxy" - stage2b: "nearest/nearest2_vp_speedmap_proxy" stage3: "speedmap/stop_arrivals_proxy" stage3b: "speedmap/stop_arrivals" stage4: "speedmap/speeds" From 3f04352347083b69de86321d20988add36679507 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 18 Dec 2024 00:11:35 +0000 Subject: [PATCH 09/10] (remove): testing script consolidating nearest neighbor intermediate steps --- .../scripts/interpolate_stop_arrival.py | 1 - .../scripts/new_nearest_vp_and_filter.py | 142 ------------------ 2 files changed, 143 deletions(-) delete mode 100644 rt_segment_speeds/scripts/new_nearest_vp_and_filter.py diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index 4031a0c87..fafb920b8 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -95,7 +95,6 @@ def add_arrival_time( getattr(row, "subseq_location_timestamp_local"), ]) - interpolated_arrival = segment_calcs.interpolate_stop_arrival_time( stop_position, projected_points, timestamp_arr) diff --git a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py b/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py deleted file mode 100644 index 9a9b21fff..000000000 --- a/rt_segment_speeds/scripts/new_nearest_vp_and_filter.py +++ /dev/null @@ -1,142 +0,0 @@ -import datetime -import geopandas as gpd -import numpy as np -import pandas as pd -import shapely -import sys - -from loguru import logger - -from shared_utils import rt_dates -from segment_speed_utils import helpers, neighbor -from update_vars import SEGMENT_GCS, SHARED_GCS, GTFS_DATA_DICT -from segment_speed_utils.project_vars import PROJECT_CRS - - -def stop_times_for_shape_segments( - analysis_date: str, - dict_inputs: dict -) -> gpd.GeoDataFrame: - """ - This is the stop times table using only 1 shape for each - route-direction. Every trip belong to that shape - will be cut along the same stops. - This allows us to aggregate segments across trips because each - segment has the same stop_id1 and stop_id2. - """ - SEGMENT_FILE = dict_inputs["segments_file"] - - rt_trips = helpers.import_unique_vp_trips(analysis_date) - - shape_stop_combinations = pd.read_parquet( - f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet", - columns = ["trip_instance_key", - "stop_id1", "stop_pair", - "st_trip_instance_key"], - filters = [[ - ("trip_instance_key", "in", rt_trips) - ]] - ).rename(columns = {"stop_id1": "stop_id"}) - - subset_trips = shape_stop_combinations.st_trip_instance_key.unique() - - stops_to_use = helpers.import_scheduled_stop_times( - analysis_date, - columns = ["trip_instance_key", "shape_array_key", - "stop_sequence", "stop_id", "stop_pair", - "stop_primary_direction", "geometry"], - filters = [[("trip_instance_key", "in", subset_trips)]], - get_pandas = True, - with_direction = True - ).rename(columns = {"trip_instance_key": "st_trip_instance_key"}) - - stop_times = pd.merge( - stops_to_use, - shape_stop_combinations, - on = ["st_trip_instance_key", "stop_id", "stop_pair"], - how = "inner" - ).drop( - columns = "st_trip_instance_key" - ).drop_duplicates().reset_index(drop=True) - - return stop_times - - -def new_nearest_neighbor_for_stop( - analysis_date: str, - segment_type: str, - config_path = GTFS_DATA_DICT -): - """ - """ - start = datetime.datetime.now() - - dict_inputs = config_path[segment_type] - trip_stop_cols = [*dict_inputs["trip_stop_cols"]] - EXPORT_FILE = dict_inputs["stage2c"] - - stop_times = stop_times_for_shape_segments( - analysis_date, - dict_inputs - ) - - gdf = neighbor.new_merge_stop_vp_for_nearest_neighbor(stop_times, analysis_date) - - vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize( - neighbor.new_subset_arrays_to_valid_directions - )( - gdf.vp_primary_direction, - gdf.vp_geometry, - gdf.vp_idx, - gdf.stop_geometry, - gdf.stop_primary_direction, - gdf.shape_geometry, - gdf.stop_meters - ) - - gdf2 = gdf.assign( - prior_vp_idx = vp_before, - subseq_vp_idx = vp_after, - prior_vp_meters = vp_before_meters, - subseq_vp_meters = vp_after_meters - )[trip_stop_cols + [ - "shape_array_key", "stop_meters", - "prior_vp_idx", "subseq_vp_idx", - "prior_vp_meters", "subseq_vp_meters"] - ] - - del gdf, stop_times - - gdf2.to_parquet(f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet") - - end = datetime.datetime.now() - logger.info(f"nearest neighbor for {segment_type} " - f"{analysis_date}: {end - start}") - - return - - - -if __name__ == "__main__": - - #from segment_speed_utils.project_vars import analysis_date_list - - from dask import delayed, compute - LOG_FILE = "../logs/test.log" - logger.add(LOG_FILE, retention="3 months") - logger.add(sys.stderr, - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - level="INFO") - - analysis_date_list = [rt_dates.DATES["sep2024"]] - segment_type = "stop_segments" - - delayed_dfs = [ - delayed(new_nearest_neighbor_for_stop)( - analysis_date = analysis_date, - segment_type = segment_type, - config_path = GTFS_DATA_DICT - ) for analysis_date in analysis_date_list - ] - - [compute(i)[0] for i in delayed_dfs] From 406035a47dffa825c559620bb6cba43569d4d43c Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 13 Dec 2024 18:51:00 +0000 Subject: [PATCH 10/10] (open_data): update readme to include 2 speed layers --- open_data/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/open_data/README.md b/open_data/README.md index ecb672b5d..48e73d5f6 100644 --- a/open_data/README.md +++ b/open_data/README.md @@ -5,6 +5,8 @@ 1. [HQTA Stops](https://gis.data.ca.gov/datasets/f6c30480f0e84be699383192c099a6a4_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_HQ_Transit_Stops/MapServer) 1. [CA Transit Routes](https://gis.data.ca.gov/datasets/dd7cb74665a14859a59b8c31d3bc5a3e_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/MapServer) 1. [CA Transit Stops](https://gis.data.ca.gov/datasets/900992cc94ab49dbbb906d8f147c2a72_0): metadata [feature server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/FeatureServer) or [map server](https://gisdata.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Stops/MapServer) +1. [CA Average Transit Speeds by Stop-to-Stop Segments](https://gis.data.ca.gov/datasets/4937eeb59fdb4e56ae75e64688c7f2c0_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Stop_Segments/MapServer/0) +1. [CA Average Transit Speeds by Route and Time of Day](https://gis.data.ca.gov/datasets/071df783099f4224b7ebb54839eae007_0/): metadata [feature server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/FeatureServer/0) or [map server](https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/Speeds_by_Route_Time_of_Day/MapServer/0) 1. All GTFS datasets [metadata/data dictionary](https://data.ca.gov/dataset/cal-itp-gtfs-ingest-pipeline-dataset/resource/e26bf6ee-419d-4a95-8e4c-e2b13d5de793) ## GTFS Schedule Routes & Stops Geospatial Data