diff --git a/_shared_utils/setup.py b/_shared_utils/setup.py index e88635a02..4ca979af3 100644 --- a/_shared_utils/setup.py +++ b/_shared_utils/setup.py @@ -4,7 +4,7 @@ setup( name="shared_utils", packages=find_packages(), - version="2.8", + version="3.0", description="Shared utility functions for data analyses", author="Cal-ITP", license="Apache", diff --git a/_shared_utils/shared_utils/__init__.py b/_shared_utils/shared_utils/__init__.py index c95975451..a0de7ba01 100644 --- a/_shared_utils/shared_utils/__init__.py +++ b/_shared_utils/shared_utils/__init__.py @@ -2,22 +2,26 @@ arcgis_query, catalog_utils, dask_utils, + geo_utils, gtfs_utils_v2, portfolio_utils, publish_utils, rt_dates, rt_utils, schedule_rt_utils, + time_helpers, ) __all__ = [ "arcgis_query", "catalog_utils", "dask_utils", + "geo_utils", "gtfs_utils_v2", "portfolio_utils", "publish_utils", "rt_dates", "rt_utils", "schedule_rt_utils", + "time_helpers", ] diff --git a/_shared_utils/shared_utils/geo_utils.py b/_shared_utils/shared_utils/geo_utils.py new file mode 100644 index 000000000..c030f1fd7 --- /dev/null +++ b/_shared_utils/shared_utils/geo_utils.py @@ -0,0 +1,177 @@ +""" +Geospatial utility functions +""" +import geopandas as gpd +import numpy as np +import pandas as pd +import shapely +from calitp_data_analysis import geography_utils +from scipy.spatial import KDTree +from shared_utils import rt_utils + +# Could we use distance to filter for nearest neighbor? +# It can make the length of results more unpredictable...maybe we stick to +# k_neighbors and keep the nearest k, so that we can at least be +# more consistent with the arrays returned +geo_const_meters = 6_371_000 * np.pi / 180 +geo_const_miles = 3_959_000 * np.pi / 180 + + +def nearest_snap(line: shapely.LineString, point: shapely.Point, k_neighbors: int = 1) -> np.ndarray: + """ + Based off of this function, + but we want to return the index value, rather than the point. + https://github.com/UTEL-UIUC/gtfs_segments/blob/main/gtfs_segments/geom_utils.py + """ + line = np.asarray(line.coords) + point = np.asarray(point.coords) + tree = KDTree(line) + + # np_dist is array of distances of result (let's not return it) + # np_inds is array of indices of result + _, np_inds = tree.query( + point, + workers=-1, + k=k_neighbors, + ) + + return np_inds.squeeze() + + +def vp_as_gdf(vp: pd.DataFrame, crs: str = "EPSG:3310") -> gpd.GeoDataFrame: + """ + Turn vp as a gdf and project to EPSG:3310. + """ + vp_gdf = ( + geography_utils.create_point_geometry(vp, longitude_col="x", latitude_col="y", crs=geography_utils.WGS84) + .to_crs(crs) + .drop(columns=["x", "y"]) + ) + + return vp_gdf + + +def add_arrowized_geometry(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Add a column where the segment is arrowized. + """ + segment_geom = gpd.GeoSeries(gdf.geometry) + CRS = gdf.crs.to_epsg() + + # TODO: parallel_offset is going to be deprecated? offset_curve is the new one + geom_parallel = gpd.GeoSeries([rt_utils.try_parallel(i) for i in segment_geom], crs=CRS) + # geom_parallel = gpd.GeoSeries( + # [i.offset_curve(30) for i in segment_geom], + # crs=CRS + # ) + + geom_arrowized = rt_utils.arrowize_segment(geom_parallel, buffer_distance=20) + + gdf = gdf.assign(geometry_arrowized=geom_arrowized) + + return gdf + + +def get_direction_vector(start: shapely.geometry.Point, end: shapely.geometry.Point) -> tuple: + """ + Given 2 points (in a projected CRS...not WGS84), return a + tuple that shows (delta_x, delta_y). + + https://www.varsitytutors.com/precalculus-help/find-a-direction-vector-when-given-two-points + https://stackoverflow.com/questions/17332759/finding-vectors-with-2-points + + """ + return ((end.x - start.x), (end.y - start.y)) + + +def distill_array_into_direction_vector(array: np.ndarray) -> tuple: + """ + Given an array of n items, let's take the start/end of that. + From start/end, we can turn 2 coordinate points into 1 distance vector. + Distance vector is a tuple that equals (delta_x, delta_y). + """ + origin = array[0] + destination = array[-1] + return get_direction_vector(origin, destination) + + +def get_vector_norm(vector: tuple) -> float: + """ + Get the length (off of Pythagorean Theorem) by summing up + the squares of the components and then taking square root. + + Use Pythagorean Theorem to get unit vector. Divide the vector + by the length of the vector to get unit/normalized vector. + This equation tells us what we need to divide by. + """ + return np.sqrt(vector[0] ** 2 + vector[1] ** 2) + + +def get_normalized_vector(vector: tuple) -> tuple: + """ + Apply Pythagorean Theorem and normalize the vector of distances. + https://stackoverflow.com/questions/21030391/how-to-normalize-a-numpy-array-to-a-unit-vector + """ + x_norm = vector[0] / get_vector_norm(vector) + y_norm = vector[1] / get_vector_norm(vector) + + return (x_norm, y_norm) + + +def dot_product(vec1: tuple, vec2: tuple) -> float: + """ + Take the dot product. Multiply the x components, the y components, and + sum it up. + """ + return vec1[0] * vec2[0] + vec1[1] * vec2[1] + + +def segmentize_by_indices(line_geometry: shapely.LineString, start_idx: int, end_idx: int) -> shapely.LineString: + """ + Cut a line according to index values. + Similar to shapely.segmentize, which allows you to cut + line according to distances. + Here, we don't have specified distances, but we want to customize + where segment the line. + """ + all_coords = shapely.get_coordinates(line_geometry) + + if end_idx + 1 > all_coords.size: + subset_coords = all_coords[start_idx:end_idx] + else: + subset_coords = all_coords[start_idx : end_idx + 1] + + if len(subset_coords) < 2: + return shapely.LineString() + else: + return shapely.LineString([shapely.Point(i) for i in subset_coords]) + + +def draw_line_between_points(gdf: gpd.GeoDataFrame, group_cols: list) -> gpd.GeoDataFrame: + """ + Use the current postmile as the + starting geometry / segment beginning + and the subsequent postmile (based on odometer) + as the ending geometry / segment end. + + Segment goes from current to next postmile. + """ + # Grab the subsequent point geometry + # We can drop whenever the last point is missing within + # a group. If we have 3 points, we can draw 2 lines. + gdf = gdf.assign(end_geometry=(gdf.groupby(group_cols, group_keys=False).geometry.shift(-1))).dropna( + subset="end_geometry" + ) + + # Construct linestring with 2 point coordinates + gdf = ( + gdf.assign( + line_geometry=gdf.apply(lambda x: shapely.LineString([x.geometry, x.end_geometry]), axis=1).set_crs( + geography_utils.WGS84 + ) + ) + .drop(columns=["geometry", "end_geometry"]) + .rename(columns={"line_geometry": "geometry"}) + ) + + return gdf diff --git a/_shared_utils/shared_utils/schedule_rt_utils.py b/_shared_utils/shared_utils/schedule_rt_utils.py index be78dae39..7b3c3bf7b 100644 --- a/_shared_utils/shared_utils/schedule_rt_utils.py +++ b/_shared_utils/shared_utils/schedule_rt_utils.py @@ -13,6 +13,14 @@ from siuba import * PACIFIC_TIMEZONE = "US/Pacific" +RENAME_DISTRICT_DICT = { + "Marysville / Sacramento": "Marysville", # D3 + "Bay Area / Oakland": "Oakland", # D4 + "San Luis Obispo / Santa Barbara": "San Luis Obispo", # D5 + "Fresno / Bakersfield": "Fresno", # D6 + "San Bernardino / Riverside": "San Bernardino", # D8 + "Orange County": "Irvine", # D12 +} def localize_timestamp_col(df: dd.DataFrame, timestamp_col: Union[str, list]) -> dd.DataFrame: @@ -84,7 +92,10 @@ def filter_dim_gtfs_datasets( custom_filtering: dict = None, get_df: bool = True, ) -> Union[pd.DataFrame, siuba.sql.verbs.LazyTbl]: - """ """ + """ + Filter mart_transit_database.dim_gtfs_dataset table + and keep only the valid rows that passed data quality checks. + """ if "key" not in keep_cols: raise KeyError("Include key in keep_cols list") @@ -164,9 +175,73 @@ def get_organization_id( return df2 +def filter_dim_county_geography( + date: str, + keep_cols: list[str] = ["caltrans_district"], +) -> pd.DataFrame: + """ + Merge mart_transit_database.dim_county_geography with + mart_transit_database.bridge_organizations_x_headquarters_county_geography. + Both tables are at organization-county-feed_period grain. + + dim_county_geography holds additional geography columns like + MSA, FIPS, etc. + + Use this merge to get caltrans_district. + Organizations belong to county, and counties are assigned to districts. + """ + bridge_orgs_county_geog = ( + tbls.mart_transit_database.bridge_organizations_x_headquarters_county_geography() + >> gtfs_utils_v2.subset_cols([_.organization_name, _.county_geography_key, _._valid_from, _._valid_to]) + >> collect() + ) + + keep_cols2 = list(set(keep_cols + ["county_geography_key", "caltrans_district_name"])) + + dim_county_geography = ( + tbls.mart_transit_database.dim_county_geography() + >> rename(county_geography_key=_.key) + >> gtfs_utils_v2.subset_cols(keep_cols2) + >> collect() + ) + + # Several caltrans_district values in mart_transit_database + # now contain slashes. + # Use dict to standardize these against how previous versions were + dim_county_geography = dim_county_geography.assign( + caltrans_district_name=dim_county_geography.apply( + lambda x: RENAME_DISTRICT_DICT[x.caltrans_district_name] + if x.caltrans_district_name in RENAME_DISTRICT_DICT.keys() + else x.caltrans_district_name, + axis=1, + ) + ) + + bridge_orgs_county_geog = localize_timestamp_col(bridge_orgs_county_geog, ["_valid_from", "_valid_to"]) + + bridge_orgs_county_geog2 = bridge_orgs_county_geog >> filter( + _._valid_from_local <= pd.to_datetime(date), _._valid_to_local >= pd.to_datetime(date) + ) + + # Merge organization-county with caltrans_district info + # it appears to be a 1:1 merge. checked whether organization can belong to multiple districts, + # and that doesn't appear to happen + df = pd.merge(bridge_orgs_county_geog2, dim_county_geography, on="county_geography_key", how="inner") + + df2 = ( + df.assign(caltrans_district=df.caltrans_district.astype(str).str.zfill(2) + " - " + df.caltrans_district_name)[ + ["organization_name"] + keep_cols + ] + .drop_duplicates() + .reset_index(drop=True) + ) + + return df2 + + def filter_dim_organizations( date: str, - keep_cols: list[str] = ["source_record_id", "caltrans_district"], + keep_cols: list[str] = ["source_record_id"], custom_filtering: dict = None, get_df: bool = True, ) -> Union[pd.DataFrame, siuba.sql.verbs.LazyTbl]: @@ -201,7 +276,8 @@ def sample_gtfs_dataset_key_to_organization_crosswalk( "base64_url", "uri", ], - dim_organization_cols: list[str] = ["source_record_id", "name", "caltrans_district"], + dim_organization_cols: list[str] = ["source_record_id", "name"], + dim_county_geography_cols: list[str] = ["caltrans_district"], ) -> pd.DataFrame: """ Get crosswalk from gtfs_dataset_key to certain quartet data identifiers @@ -243,11 +319,17 @@ def sample_gtfs_dataset_key_to_organization_crosswalk( feeds_with_org_id = get_organization_id(feeds_with_quartet_info, date, merge_cols=merge_cols) - # (4) Merge in dim_orgs to get caltrans_district + # (4) Merge in dim_orgs to get organization info - everything except caltrans_district is found here ORG_RENAME_DICT = {"source_record_id": "organization_source_record_id", "name": "organization_name"} orgs = filter_dim_organizations(date, keep_cols=dim_organization_cols, get_df=True).rename(columns=ORG_RENAME_DICT) - feeds_with_district = pd.merge(feeds_with_org_id, orgs, on="organization_source_record_id") + feeds_with_org_info = pd.merge(feeds_with_org_id, orgs, on="organization_source_record_id") + + # (5) Merge in dim_county_geography to get caltrans_district + # https://github.com/cal-itp/data-analyses/issues/1282 + district = filter_dim_county_geography(date, dim_county_geography_cols) + + feeds_with_district = pd.merge(feeds_with_org_info, district, on="organization_name") return feeds_with_district diff --git a/_shared_utils/shared_utils/shared_data.py b/_shared_utils/shared_utils/shared_data.py index aa0bceb03..f5a12e781 100644 --- a/_shared_utils/shared_utils/shared_data.py +++ b/_shared_utils/shared_utils/shared_data.py @@ -2,11 +2,11 @@ One-off functions, run once, save datasets for shared use. """ import geopandas as gpd +import numpy as np import pandas as pd -import shapely from calitp_data_analysis import geography_utils, utils from calitp_data_analysis.sql import to_snakecase -from shared_utils.arcgis_query import query_arcgis_feature_server +from shared_utils import arcgis_query, geo_utils GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/shared_data/" COMPILED_CACHED_GCS = "gs://calitp-analytics-data/data-analyses/rt_delay/compiled_cached_views/" @@ -67,6 +67,11 @@ def make_clean_state_highway_network(): gdf = gpd.read_file(URL) + # Save a raw, undissolved version + utils.geoparquet_gcs_export( + gdf.drop(columns=["Shape_Length", "OBJECTID"]).pipe(to_snakecase), GCS_FILE_PATH, "state_highway_network_raw" + ) + keep_cols = ["Route", "County", "District", "RouteType", "Direction", "geometry"] gdf = gdf[keep_cols] @@ -90,7 +95,7 @@ def export_shn_postmiles(): """ URL = "https://caltrans-gis.dot.ca.gov/arcgis/rest/services/" "CHhighway/SHN_Postmiles_Tenth/" "FeatureServer/0/" - gdf = query_arcgis_feature_server(URL) + gdf = arcgis_query.query_arcgis_feature_server(URL) gdf2 = to_snakecase(gdf).drop(columns="objectid") @@ -99,7 +104,7 @@ def export_shn_postmiles(): return -def draw_line_between_points(gdf: gpd.GeoDataFrame, group_cols: list) -> gpd.GeoDataFrame: +def segment_highway_lines_by_postmile(gdf: gpd.GeoDataFrame, group_cols: list) -> gpd.GeoDataFrame: """ Use the current postmile as the starting geometry / segment beginning @@ -108,53 +113,75 @@ def draw_line_between_points(gdf: gpd.GeoDataFrame, group_cols: list) -> gpd.Geo Segment goes from current to next postmile. """ - # Grab the subsequent point geometry - # We can drop whenever the last point is missing within - # a group. If we have 3 points, we can draw 2 lines. - gdf = gdf.assign(end_geometry=(gdf.groupby(group_cols, group_keys=False).geometry.shift(-1))).dropna( - subset="end_geometry" - ) + # For this postmile, snap it to the highway line and find the nearest index + # for a linestring with 10 points, an index value of 2 means it's the 3rd coordinate + nearest_idx_series = np.vectorize(geo_utils.nearest_snap)(gdf.line_geometry, gdf.geometry, 1) - # Construct linestring with 2 point coordinates - gdf = ( - gdf.assign( - line_geometry=gdf.apply(lambda x: shapely.LineString([x.geometry, x.end_geometry]), axis=1).set_crs( - geography_utils.WGS84 - ) - ) - .drop(columns=["geometry", "end_geometry"]) - .rename(columns={"line_geometry": "geometry"}) - ) + gdf["idx"] = nearest_idx_series + + # The segment will be index into the nearest point for a postmile + # until the index of the subsequent postmile + # Ex: idx=1 and subseq_idx=5 means we want to grab hwy_coords[1:6] as our segment + gdf = gdf.assign( + subseq_idx=(gdf.sort_values(group_cols + ["odometer"]).groupby(group_cols).idx.shift(-1).astype("Int64")), + eodometer=(gdf.sort_values(group_cols + ["odometer"]).groupby(group_cols).odometer.shift(-1)), + ).rename(columns={"odometer": "bodometer"}) + # follow the convention of b for begin odometer and e for end odometer + + # Drop NaNs because for 3 points, we can draw 2 segments + gdf2 = gdf.dropna(subset="subseq_idx").reset_index(drop=True) + + segment_geom = np.vectorize(geo_utils.segmentize_by_indices)(gdf2.line_geometry, gdf2.idx, gdf2.subseq_idx) + + gdf3 = gdf2.assign( + geometry=gpd.GeoSeries(segment_geom).set_crs(geography_utils.WGS84), + ).drop(columns=["line_geometry", "idx", "subseq_idx"]) - return gdf + return gdf3 -def create_postmile_segments(group_cols: list) -> gpd.GeoDataFrame: +def create_postmile_segments( + group_cols: list = ["county", "routetype", "route", "direction", "routes", "pmrouteid"] +) -> gpd.GeoDataFrame: """ Take the SHN postmiles gdf, group by highway / odometer and convert the points into lines. We'll lose the last postmile for each highway-direction. Segment goes from current postmile point to subseq postmile point. """ - gdf = gpd.read_parquet( - f"{GCS_FILE_PATH}state_highway_network_postmiles.parquet", - columns=["route", "direction", "odometer", "geometry"], + # We need multilinestrings to become linestrings (use gdf.explode) + # and the columns we select do uniquely tag lines (multilinestrings are 1 item) + hwy_lines = gpd.read_parquet( + f"{GCS_FILE_PATH}state_highway_network_raw.parquet", + columns=group_cols + ["bodometer", "eodometer", "geometry"], + ).explode("geometry") + + hwy_postmiles = gpd.read_parquet( + f"{GCS_FILE_PATH}state_highway_network_postmiles.parquet", columns=group_cols + ["odometer", "geometry"] ) - # If there are duplicates with highway-direction and odometer - # (where pm or other column differs slightly), - # we'll drop and cut as long of a segment we can - # There may be differences in postmile (relative to county start) - # and odometer (relative to line's origin). - gdf2 = ( - gdf.sort_values(group_cols + ["odometer"]) - .drop_duplicates(subset=group_cols + ["odometer"]) + # Merge hwy points with the lines we want to cut segments from + gdf = ( + pd.merge(hwy_postmiles, hwy_lines.rename(columns={"geometry": "line_geometry"}), on=group_cols, how="inner") + .query( + # make sure that the postmile point falls between + # the beginning and ending odometer + # once we check this, we don't need b/e odometer. + "odometer >= bodometer & odometer <= eodometer" + ) + .sort_values(group_cols + ["odometer"]) .reset_index(drop=True) + .drop(columns=["bodometer", "eodometer"]) ) - gdf3 = draw_line_between_points(gdf2, group_cols) + gdf2 = segment_highway_lines_by_postmile(gdf, group_cols) + + # TODO: there are rows with empty geometry because their indexed value is the same for current and subseq + # so no line was drawn + # check if it's ok for these to exist + # gdf2[gdf2.geometry.is_empty] shows about 57k rows that didn't get cut - utils.geoparquet_gcs_export(gdf3, GCS_FILE_PATH, "state_highway_network_postmile_segments") + utils.geoparquet_gcs_export(gdf2, GCS_FILE_PATH, "state_highway_network_postmile_segments") return @@ -243,7 +270,7 @@ def make_transit_operators_to_legislative_district_crosswalk(date_list: list) -> # State Highway Network make_clean_state_highway_network() export_shn_postmiles() - create_postmile_segments(["route", "direction"]) + create_postmile_segments(["district", "county", "routetype", "route", "direction", "routes", "pmrouteid"]) # Legislative Districts export_combined_legislative_districts() diff --git a/_shared_utils/shared_utils/time_helpers.py b/_shared_utils/shared_utils/time_helpers.py new file mode 100644 index 000000000..29b3d0c1e --- /dev/null +++ b/_shared_utils/shared_utils/time_helpers.py @@ -0,0 +1,78 @@ +""" +Helpers for defining peak vs offpeak periods and +weekend and weekends so we can aggregate our +existing time-of-day bins. +""" +import datetime + +import pandas as pd + +PEAK_PERIODS = ["AM Peak", "PM Peak"] + +HOURS_BY_TIME_OF_DAY = { + "Owl": 4, # [0, 3] + "Early AM": 3, # [4, 6] + "AM Peak": 3, # [7, 9] + "Midday": 5, # [10, 14] + "PM Peak": 5, # [15, 19] + "Evening": 4, # [20, 23] +} + +TIME_OF_DAY_DICT = { + **{k: "peak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k in PEAK_PERIODS}, + **{k: "offpeak" for k, v in HOURS_BY_TIME_OF_DAY.items() if k not in PEAK_PERIODS}, +} + +DAY_TYPE_DICT = { + 1: "Sunday", + 2: "Monday", + 3: "Tuesday", + 4: "Wednesday", + 5: "Thursday", + 6: "Friday", + 7: "Saturday", +} + +WEEKDAY_DICT = { + **{k: "weekday" for k in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]}, + **{k: "weekend" for k in ["Saturday", "Sunday"]}, +} + + +def time_span_labeling(date_list: list) -> tuple[str]: + """ + If we grab a week's worth of trips, we'll + use this week's average to stand-in for the entire month. + Label with month and year. + """ + time_span_str = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%b%Y").lower() for d in date_list])) + + time_span_num = list(set([datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%m_%Y").lower() for d in date_list])) + + if len(time_span_str) == 1: + return time_span_str[0], time_span_num[0] + + else: + print(f"multiple months: {time_span_str}") + return time_span_str, time_span_num + + +def add_time_span_columns(df: pd.DataFrame, time_span_num: str) -> pd.DataFrame: + """ + Add columns for month / year, use when we have aggregated time-series. + """ + month = int(time_span_num.split("_")[0]) + year = int(time_span_num.split("_")[1]) + + # Downgrade some dtypes for public bucket + df = df.assign( + month=month, + year=year, + ).astype( + { + "month": "int16", + "year": "int16", + } + ) + + return df diff --git a/gtfs_funnel/concatenate_monthly_scheduled_service.py b/gtfs_funnel/concatenate_monthly_scheduled_service.py index da2a9f615..b736c0e49 100644 --- a/gtfs_funnel/concatenate_monthly_scheduled_service.py +++ b/gtfs_funnel/concatenate_monthly_scheduled_service.py @@ -6,8 +6,8 @@ schedule_gtfs_dataset_key. """ import pandas as pd -from segment_speed_utils import helpers, time_helpers, time_series_utils -from shared_utils import rt_dates +from segment_speed_utils import helpers, time_series_utils +from shared_utils import rt_dates, time_helpers from update_vars import GTFS_DATA_DICT, SCHED_GCS def parse_service_date(df: pd.DataFrame) -> pd.DataFrame: diff --git a/gtfs_funnel/stop_arrivals_in_roads.py b/gtfs_funnel/stop_arrivals_in_roads.py index 488435c00..6d623d57b 100644 --- a/gtfs_funnel/stop_arrivals_in_roads.py +++ b/gtfs_funnel/stop_arrivals_in_roads.py @@ -8,10 +8,9 @@ from segment_speed_utils import (helpers, gtfs_schedule_wrangling, - time_helpers ) from segment_speed_utils.project_vars import PROJECT_CRS -from shared_utils import rt_dates +from shared_utils import rt_dates, time_helpers from update_vars import SHARED_GCS, SCHED_GCS road_cols = ["linearid", "mtfcc", "fullname"] diff --git a/gtfs_funnel/stop_times_with_direction.py b/gtfs_funnel/stop_times_with_direction.py index 4e79e5604..284651297 100644 --- a/gtfs_funnel/stop_times_with_direction.py +++ b/gtfs_funnel/stop_times_with_direction.py @@ -2,7 +2,6 @@ Create a schedule stop_times table with direction of travel between stops. """ -import dask.dataframe as dd import datetime import geopandas as gpd import numpy as np @@ -10,7 +9,7 @@ from calitp_data_analysis import utils from shared_utils import rt_utils -from segment_speed_utils import helpers, wrangle_shapes +from segment_speed_utils import helpers from segment_speed_utils.project_vars import PROJECT_CRS from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS diff --git a/gtfs_funnel/vp_condenser.py b/gtfs_funnel/vp_condenser.py index 10ccb0221..2ec815b24 100644 --- a/gtfs_funnel/vp_condenser.py +++ b/gtfs_funnel/vp_condenser.py @@ -11,7 +11,8 @@ from calitp_data_analysis.geography_utils import WGS84 from calitp_data_analysis import utils -from segment_speed_utils import vp_transform, wrangle_shapes +from segment_speed_utils import vp_transform +from shared_utils import geo_utils from update_vars import GTFS_DATA_DICT, SEGMENT_GCS def condense_vp_to_linestring( @@ -34,7 +35,7 @@ def condense_vp_to_linestring( "location_timestamp_local", "moving_timestamp_local", ], - ).pipe(wrangle_shapes.vp_as_gdf, crs = WGS84) + ).pipe(geo_utils.vp_as_gdf, crs = WGS84) vp_condensed = delayed(vp_transform.condense_point_geom_to_line)( vp, @@ -80,7 +81,7 @@ def prepare_vp_for_all_directions( dfs = [ delayed(vp_transform.combine_valid_vp_for_direction)( vp, direction) - for direction in wrangle_shapes.ALL_DIRECTIONS + for direction in vp_transform.ALL_DIRECTIONS ] results = [compute(i)[0] for i in dfs] diff --git a/gtfs_funnel/vp_direction.py b/gtfs_funnel/vp_direction.py index 87986fef1..21bf97145 100644 --- a/gtfs_funnel/vp_direction.py +++ b/gtfs_funnel/vp_direction.py @@ -18,7 +18,7 @@ from loguru import logger from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import segment_calcs, wrangle_shapes +from segment_speed_utils import segment_calcs from segment_speed_utils.project_vars import PROJECT_CRS from shared_utils import publish_utils, rt_utils from update_vars import GTFS_DATA_DICT, SEGMENT_GCS diff --git a/open_data/data_dictionary.yml b/open_data/data_dictionary.yml index b8ff9e42b..e3e009814 100644 --- a/open_data/data_dictionary.yml +++ b/open_data/data_dictionary.yml @@ -60,7 +60,7 @@ common-fields: offpeak - offpeak hours are 12am-6:59am inclusive, 10am-2:59pm inclusive, and 8pm-11:59pm inclusive. peak - peak hours are 7am-9:59am inclusive and 3pm-7:59pm inclusive. all_day - definition_source: "https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/time_helpers.py" + definition_source: "https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/time_helpers.py" - &hqta_type definition: |- Indicates the type of High Quality Transit Area as defined in the Public Resources Code that a stop is and/or falls within. Valid options are: hq_corridor_bus (PRC 21155), major_stop_bus (PRC 21064.3), major_stop_rail (PRC 21064.3), major_stop_ferry (PRC 21064.3), major_stop_brt (PRC 21064.3, 21060.2) diff --git a/open_data/xml/speeds_by_route_time_of_day_fgdc.xml b/open_data/xml/speeds_by_route_time_of_day_fgdc.xml index a50f6fc57..9adee1a3a 100644 --- a/open_data/xml/speeds_by_route_time_of_day_fgdc.xml +++ b/open_data/xml/speeds_by_route_time_of_day_fgdc.xml @@ -96,7 +96,7 @@ offpeak - offpeak hours are 12am-6:59am inclusive, 10am-2:59pm inclusive, and 8pm-11:59pm inclusive. peak - peak hours are 7am-9:59am inclusive and 3pm-7:59pm inclusive. all_day - https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/time_helpers.py + https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/time_helpers.py speed_mph diff --git a/open_data/xml/speeds_by_stop_segments_fgdc.xml b/open_data/xml/speeds_by_stop_segments_fgdc.xml index 6cf551592..4f2d5f246 100644 --- a/open_data/xml/speeds_by_stop_segments_fgdc.xml +++ b/open_data/xml/speeds_by_stop_segments_fgdc.xml @@ -115,7 +115,7 @@ offpeak - offpeak hours are 12am-6:59am inclusive, 10am-2:59pm inclusive, and 8pm-11:59pm inclusive. peak - peak hours are 7am-9:59am inclusive and 3pm-7:59pm inclusive. all_day - https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/time_helpers.py + https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/time_helpers.py base64_url diff --git a/rt_segment_speeds/scripts/average_segment_speeds.py b/rt_segment_speeds/scripts/average_segment_speeds.py index c7b0b6368..ad9e68170 100644 --- a/rt_segment_speeds/scripts/average_segment_speeds.py +++ b/rt_segment_speeds/scripts/average_segment_speeds.py @@ -16,12 +16,13 @@ helpers, metrics, segment_calcs, - time_helpers, time_series_utils ) +from shared_utils import time_helpers from update_vars import GTFS_DATA_DICT, SEGMENT_GCS from segment_speed_utils.project_vars import SEGMENT_TYPES + OPERATOR_COLS = [ "schedule_gtfs_dataset_key", ] diff --git a/rt_segment_speeds/scripts/average_summary_speeds.py b/rt_segment_speeds/scripts/average_summary_speeds.py index 7ddb66a7b..d3c3ad25e 100644 --- a/rt_segment_speeds/scripts/average_summary_speeds.py +++ b/rt_segment_speeds/scripts/average_summary_speeds.py @@ -14,8 +14,8 @@ from calitp_data_analysis import utils from segment_speed_utils import (gtfs_schedule_wrangling, metrics, - time_helpers, ) +from shared_utils import time_helpers from segment_speed_utils.project_vars import SEGMENT_TYPES from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from average_segment_speeds import (OPERATOR_COLS, CROSSWALK_COLS, diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index 80fa10261..a155b74c1 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -12,7 +12,7 @@ from typing import Literal, Optional from segment_speed_utils import (array_utils, helpers, - segment_calcs, wrangle_shapes) + segment_calcs) from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import PROJECT_CRS, SEGMENT_TYPES from shared_utils import rt_dates @@ -138,7 +138,7 @@ def add_arrival_time( ]) - interpolated_arrival = wrangle_shapes.interpolate_stop_arrival_time( + interpolated_arrival = segment_calcs.interpolate_stop_arrival_time( stop_position, projected_points, timestamp_arr) arrival_time_series.append(interpolated_arrival) @@ -187,7 +187,7 @@ def stop_and_arrival_time_arrays_by_trip( # Use correct values to fill in the missing arrival times df2 = df2.assign( arrival_time = df2.apply( - lambda x: wrangle_shapes.interpolate_stop_arrival_time( + lambda x: segment_calcs.interpolate_stop_arrival_time( x.stop_meters, x.stop_meters_arr, x.arrival_time_arr ), axis=1 ) diff --git a/rt_segment_speeds/scripts/nearest_vp_to_road.py b/rt_segment_speeds/scripts/nearest_vp_to_road.py index 4d5952d78..d92798aaf 100644 --- a/rt_segment_speeds/scripts/nearest_vp_to_road.py +++ b/rt_segment_speeds/scripts/nearest_vp_to_road.py @@ -7,7 +7,7 @@ from dask import delayed, compute -from segment_speed_utils import helpers, neighbor, segment_calcs, wrangle_shapes +from segment_speed_utils import helpers, neighbor, segment_calcs from segment_speed_utils.project_vars import SEGMENT_GCS, SHARED_GCS, PROJECT_CRS import interpolate_stop_arrival diff --git a/rt_segment_speeds/scripts/vp_around_stops.py b/rt_segment_speeds/scripts/vp_around_stops.py index 904896139..9f76378e0 100644 --- a/rt_segment_speeds/scripts/vp_around_stops.py +++ b/rt_segment_speeds/scripts/vp_around_stops.py @@ -14,7 +14,8 @@ from pathlib import Path from typing import Literal, Optional -from segment_speed_utils import helpers, wrangle_shapes +from segment_speed_utils import helpers +from shared_utils import geo_utils from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import SEGMENT_TYPES, PROJECT_CRS @@ -115,7 +116,7 @@ def get_vp_projected_against_shape( f"{SEGMENT_GCS}{input_file}_{analysis_date}", columns = ["trip_instance_key", "vp_idx", "x", "y"], **kwargs - ).pipe(wrangle_shapes.vp_as_gdf, crs = PROJECT_CRS) + ).pipe(geo_utils.vp_as_gdf, crs = PROJECT_CRS) # Merge all together so we can project vp point goem # against shape line geom diff --git a/rt_segment_speeds/segment_speed_utils/__init__.py b/rt_segment_speeds/segment_speed_utils/__init__.py index b7e75b05e..4d10f7d0e 100644 --- a/rt_segment_speeds/segment_speed_utils/__init__.py +++ b/rt_segment_speeds/segment_speed_utils/__init__.py @@ -7,10 +7,8 @@ parallel_corridors, project_vars, segment_calcs, - time_helpers, time_series_utils, vp_transform, - wrangle_shapes, ) __all__ = [ @@ -22,8 +20,6 @@ "parallel_corridors", "project_vars", "segment_calcs", - "time_helpers", "time_series_utils", "vp_transform", - "wrangle_shapes", ] \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py b/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py index d73c378bd..da2ac2b2c 100644 --- a/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py +++ b/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py @@ -1,14 +1,14 @@ """ All kinds of GTFS schedule table wrangling. """ +import dask.dataframe as dd import geopandas as gpd import pandas as pd -import dask.dataframe as dd from typing import Literal, Union -from segment_speed_utils import helpers, time_helpers -from shared_utils import portfolio_utils, rt_utils +from segment_speed_utils import helpers +from shared_utils import portfolio_utils, rt_utils, time_helpers from segment_speed_utils.project_vars import SEGMENT_GCS sched_rt_category_dict = { diff --git a/rt_segment_speeds/segment_speed_utils/neighbor.py b/rt_segment_speeds/segment_speed_utils/neighbor.py index ec18fe074..54ee5d8d4 100644 --- a/rt_segment_speeds/segment_speed_utils/neighbor.py +++ b/rt_segment_speeds/segment_speed_utils/neighbor.py @@ -6,41 +6,11 @@ import pandas as pd import shapely -from scipy.spatial import KDTree - from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling, wrangle_shapes +from segment_speed_utils import gtfs_schedule_wrangling from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT +from shared_utils import geo_utils -# Could we use distance to filter for nearest neighbor? -# It can make the length of results more unpredictable...maybe we stick to -# k_neighbors and keep the nearest k, so that we can at least be -# more consistent with the arrays returned -geo_const_meters = 6_371_000 * np.pi / 180 -geo_const_miles = 3_959_000 * np.pi / 180 - -def nearest_snap( - line: shapely.LineString, - point: shapely.Point, - k_neighbors: int = 1 -) -> np.ndarray: - """ - Based off of this function, - but we want to return the index value, rather than the point. - https://github.com/UTEL-UIUC/gtfs_segments/blob/main/gtfs_segments/geom_utils.py - """ - line = np.asarray(line.coords) - point = np.asarray(point.coords) - tree = KDTree(line) - - # np_dist is array of distances of result (let's not return it) - # np_inds is array of indices of result - _, np_inds = tree.query( - point, workers=-1, k=k_neighbors, - ) - - return np_inds.squeeze() - def add_nearest_vp_idx( vp_linestring: shapely.LineString, @@ -51,7 +21,7 @@ def add_nearest_vp_idx( Index into where the nearest vp is to the stop, and return that vp_idx value from the vp_idx array. """ - idx = nearest_snap(vp_linestring, stop, k_neighbors=1) + idx = geo_utils.nearest_snap(vp_linestring, stop, k_neighbors=1) return vp_idx_arr[idx] @@ -105,7 +75,7 @@ def add_nearest_neighbor_result_array( stop_geometry = getattr(row, "stop_geometry") vp_idx_arr = getattr(row, "vp_idx") - np_inds = nearest_snap( + np_inds = geo_utils.nearest_snap( vp_coords_line, stop_geometry, N_NEAREST_POINTS ) diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py index 46f33e047..9bfac4613 100644 --- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py +++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py @@ -134,3 +134,19 @@ def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: ).reset_index(drop=True).compute() return df2 + + +def interpolate_stop_arrival_time( + stop_position: float, + shape_meters_arr: np.ndarray, + timestamp_arr: np.ndarray +) -> float: + """ + Interpolate the arrival time given the stop meters position. + Cast datetimes into floats and cast back as datetime. + """ + timestamp_arr = np.asarray(timestamp_arr).astype("datetime64[s]").astype("float64") + + return np.interp( + stop_position, np.asarray(shape_meters_arr), timestamp_arr + ).astype("datetime64[s]") \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/time_helpers.py b/rt_segment_speeds/segment_speed_utils/time_helpers.py deleted file mode 100644 index af74842ae..000000000 --- a/rt_segment_speeds/segment_speed_utils/time_helpers.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Helpers for defining peak vs offpeak periods and -weekend and weekends so we can aggregate our -existing time-of-day bins. -""" -import datetime -import pandas as pd - -PEAK_PERIODS = ["AM Peak", "PM Peak"] - -HOURS_BY_TIME_OF_DAY = { - "Owl": 4, #[0, 3] - "Early AM": 3, #[4, 6] - "AM Peak": 3, #[7, 9] - "Midday": 5, #[10, 14] - "PM Peak": 5, #[15, 19] - "Evening": 4 #[20, 23] -} - -TIME_OF_DAY_DICT = { - **{k: "peak" for k, v in HOURS_BY_TIME_OF_DAY.items() - if k in PEAK_PERIODS}, - **{k: "offpeak" for k, v in HOURS_BY_TIME_OF_DAY.items() - if k not in PEAK_PERIODS} -} - -DAY_TYPE_DICT = { - 1: "Sunday", - 2: "Monday", - 3: "Tuesday", - 4: "Wednesday", - 5: "Thursday", - 6: "Friday", - 7: "Saturday", -} - -WEEKDAY_DICT = { - **{k: "weekday" for k in ["Monday", "Tuesday", "Wednesday", - "Thursday", "Friday"]}, - **{k: "weekend" for k in ["Saturday", "Sunday"]} -} - -def time_span_labeling(date_list: list) -> tuple[str]: - """ - If we grab a week's worth of trips, we'll - use this week's average to stand-in for the entire month. - Label with month and year. - """ - time_span_str = list(set( - [datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%b%Y").lower() - for d in date_list] - )) - - time_span_num = list(set( - [datetime.datetime.strptime(d, "%Y-%m-%d").strftime("%m_%Y").lower() - for d in date_list] - )) - - if len(time_span_str) == 1: - return time_span_str[0], time_span_num[0] - - else: - print(f"multiple months: {time_span_str}") - return time_span_str, time_span_num - - -def add_time_span_columns( - df: pd.DataFrame, - time_span_num: str -) -> pd.DataFrame: - - month = int(time_span_num.split('_')[0]) - year = int(time_span_num.split('_')[1]) - - # Downgrade some dtypes for public bucket - df = df.assign( - month = month, - year = year, - ).astype({ - "month": "int16", - "year": "int16", - }) - - return df \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/vp_transform.py b/rt_segment_speeds/segment_speed_utils/vp_transform.py index 32a157a63..154a5b040 100644 --- a/rt_segment_speeds/segment_speed_utils/vp_transform.py +++ b/rt_segment_speeds/segment_speed_utils/vp_transform.py @@ -4,7 +4,16 @@ import shapely from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import wrangle_shapes + +ALL_DIRECTIONS = ["Northbound", "Southbound", "Eastbound", "Westbound"] + +OPPOSITE_DIRECTIONS = { + "Northbound": "Southbound", + "Southbound": "Northbound", + "Eastbound": "Westbound", + "Westbound": "Eastbound", + "Unknown": "", +} def condense_point_geom_to_line( df: pd.DataFrame, @@ -67,7 +76,7 @@ def combine_valid_vp_for_direction( direction: str ) -> gpd.GeoDataFrame: - opposite_direction = wrangle_shapes.OPPOSITE_DIRECTIONS[direction] + opposite_direction = OPPOSITE_DIRECTIONS[direction] coords_series = [] vp_idx_series = [] diff --git a/rt_segment_speeds/segment_speed_utils/wrangle_shapes.py b/rt_segment_speeds/segment_speed_utils/wrangle_shapes.py deleted file mode 100644 index 6becb9703..000000000 --- a/rt_segment_speeds/segment_speed_utils/wrangle_shapes.py +++ /dev/null @@ -1,195 +0,0 @@ -""" -Functions for applying shapely project and interpolation. -Move our shapes (linestrings) and stops (points) from coordinates -to numpy arrays with numeric values (shape_meters) and vice versa. - -References: -* Tried method 4: https://gis.stackexchange.com/questions/203048/split-lines-at-points-using-shapely -- debug because we lost curves -* https://stackoverflow.com/questions/31072945/shapely-cut-a-piece-from-a-linestring-at-two-cutting-points -* https://gis.stackexchange.com/questions/210220/break-a-shapely-linestring-at-multiple-points -* https://gis.stackexchange.com/questions/416284/splitting-multiline-or-linestring-into-equal-segments-of-particular-length-using -* https://stackoverflow.com/questions/62053253/how-to-split-a-linestring-to-segments -""" -import geopandas as gpd -import numpy as np -import pandas as pd -import shapely - -from typing import Literal - -from calitp_data_analysis import geography_utils -from shared_utils import rt_utils -from segment_speed_utils.project_vars import PROJECT_CRS - -ALL_DIRECTIONS = ["Northbound", "Southbound", "Eastbound", "Westbound"] - -OPPOSITE_DIRECTIONS = { - "Northbound": "Southbound", - "Southbound": "Northbound", - "Eastbound": "Westbound", - "Westbound": "Eastbound", - "Unknown": "", -} - -def interpolate_projected_points( - shape_geometry: shapely.geometry.LineString, - projected_list: list -): - return [shape_geometry.interpolate(i) for i in projected_list] - - -def project_list_of_coords( - shape_geometry: shapely.geometry.LineString, - point_geom_list: list = [], - use_shapely_coords: bool = False -) -> np.ndarray: - if use_shapely_coords: - # https://stackoverflow.com/questions/49330030/remove-a-duplicate-point-from-polygon-in-shapely - # use simplify(0) to remove any points that might be duplicates - return np.asarray( - [shape_geometry.project(shapely.geometry.Point(p)) - for p in shape_geometry.simplify(0).coords]) - else: - return np.asarray( - [shape_geometry.project(i) for i in point_geom_list]) - - -def add_arrowized_geometry(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Add a column where the segment is arrowized. - """ - - segment_geom = gpd.GeoSeries(gdf.geometry) - CRS = gdf.crs.to_epsg() - - #TODO: parallel_offset is going to be deprecated? offset_curve is the new one - geom_parallel = gpd.GeoSeries( - [rt_utils.try_parallel(i) for i in segment_geom], crs=CRS) - #geom_parallel = gpd.GeoSeries( - # [i.offset_curve(30) for i in segment_geom], - # crs=CRS - #) - - geom_arrowized = rt_utils.arrowize_segment( - geom_parallel, - buffer_distance = 20 - ) - - gdf = gdf.assign( - geometry_arrowized = geom_arrowized - ) - - return gdf - - -def array_to_geoseries( - array: np.ndarray, - geom_type: Literal["point", "line", "polygon"], - crs: str = "EPSG:3310" -)-> gpd.GeoSeries: - """ - Turn array back into geoseries. - """ - if geom_type == "point": - gdf = gpd.GeoSeries(array, crs=crs) - - elif geom_type == "line": - gdf = gpd.GeoSeries( - shapely.geometry.LineString(array), - crs=crs) - - elif geom_type == "polygon": - gdf = gpd.GeoSeries( - shapely.geometry.Polygon(array), - crs = crs) - - return gdf - - -def get_direction_vector( - start: shapely.geometry.Point, - end: shapely.geometry.Point -) -> tuple: - """ - Given 2 points (in a projected CRS...not WGS84), return a - tuple that shows (delta_x, delta_y). - - https://www.varsitytutors.com/precalculus-help/find-a-direction-vector-when-given-two-points - https://stackoverflow.com/questions/17332759/finding-vectors-with-2-points - - """ - return ((end.x - start.x), (end.y - start.y)) - - -def distill_array_into_direction_vector(array: np.ndarray) -> tuple: - """ - Given an array of n items, let's take the start/end of that. - From start/end, we can turn 2 coordinate points into 1 distance vector. - Distance vector is a tuple that equals (delta_x, delta_y). - """ - origin = array[0] - destination = array[-1] - return get_direction_vector(origin, destination) - - -def get_vector_norm(vector: tuple) -> float: - """ - Get the length (off of Pythagorean Theorem) by summing up - the squares of the components and then taking square root. - - Use Pythagorean Theorem to get unit vector. Divide the vector - by the length of the vector to get unit/normalized vector. - This equation tells us what we need to divide by. - """ - return np.sqrt(vector[0]**2 + vector[1]**2) - - -def get_normalized_vector(vector: tuple) -> tuple: - """ - Apply Pythagorean Theorem and normalize the vector of distances. - https://stackoverflow.com/questions/21030391/how-to-normalize-a-numpy-array-to-a-unit-vector - """ - x_norm = vector[0] / get_vector_norm(vector) - y_norm = vector[1] / get_vector_norm(vector) - - return (x_norm, y_norm) - - -def dot_product(vec1: tuple, vec2: tuple) -> float: - """ - Take the dot product. Multiply the x components, the y components, and - sum it up. - """ - return vec1[0]*vec2[0] + vec1[1]*vec2[1] - - -def vp_as_gdf( - vp: pd.DataFrame, - crs: str = PROJECT_CRS -) -> gpd.GeoDataFrame: - """ - Turn vp as a gdf and project to EPSG:3310. - """ - vp_gdf = geography_utils.create_point_geometry( - vp, - longitude_col = "x", latitude_col = "y", - crs = geography_utils.WGS84 - ).to_crs(crs).drop(columns = ["x", "y"]) - - return vp_gdf - - -def interpolate_stop_arrival_time( - stop_position: float, - shape_meters_arr: np.ndarray, - timestamp_arr: np.ndarray -) -> float: - """ - Interpolate the arrival time given the stop meters position. - Cast datetimes into floats and cast back as datetime. - """ - timestamp_arr = np.asarray(timestamp_arr).astype("datetime64[s]").astype("float64") - - return np.interp( - stop_position, np.asarray(shape_meters_arr), timestamp_arr - ).astype("datetime64[s]") \ No newline at end of file diff --git a/rt_segment_speeds/setup.py b/rt_segment_speeds/setup.py index 00030a4e9..be6555fa4 100644 --- a/rt_segment_speeds/setup.py +++ b/rt_segment_speeds/setup.py @@ -3,7 +3,7 @@ setup( name="segment_speed_utils", packages=find_packages(), - version="1.5", + version="1.6", description="Utility functions for GTFS RT segment speeds", author="Cal-ITP", license="Apache",