Skip to content

Commit

Permalink
Merge pull request #1319 from cal-itp/remove-vp-nn-output
Browse files Browse the repository at this point in the history
Remove vp nn output
  • Loading branch information
tiffanychu90 authored Dec 9, 2024
2 parents cbac204 + ebd0676 commit 323bbed
Show file tree
Hide file tree
Showing 10 changed files with 575 additions and 161 deletions.
2 changes: 1 addition & 1 deletion _shared_utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-e .
altair==5.3.0
altair-transform==0.2.0
gtfs-segments==0.1.0
gtfs-segments==2.1.7
pyairtable==2.2.2
great_tables==0.14.0
omegaconf==2.3.0 # better yaml configuration
Expand Down
9 changes: 7 additions & 2 deletions _shared_utils/shared_utils/geo_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Geospatial utility functions
"""
from typing import Union

import geopandas as gpd
import numpy as np
import pandas as pd
Expand All @@ -17,13 +19,16 @@
geo_const_miles = 3_959_000 * np.pi / 180


def nearest_snap(line: shapely.LineString, point: shapely.Point, k_neighbors: int = 1) -> np.ndarray:
def nearest_snap(line: Union[shapely.LineString, np.ndarray], point: shapely.Point, k_neighbors: int = 1) -> np.ndarray:
"""
Based off of this function,
but we want to return the index value, rather than the point.
https://github.com/UTEL-UIUC/gtfs_segments/blob/main/gtfs_segments/geom_utils.py
"""
line = np.asarray(line.coords)
if isinstance(line, shapely.LineString):
line = np.asarray(line.coords)
elif isinstance(line, np.ndarray):
line = line
point = np.asarray(point.coords)
tree = KDTree(line)

Expand Down
1 change: 0 additions & 1 deletion _shared_utils/shared_utils/gtfs_analytics_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ speeds_tables:
usable_vp: vp_usable
vp_dwell: vp_usable_dwell
vp_condensed_line: condensed/vp_condensed
vp_nearest_neighbor: condensed/vp_nearest_neighbor
timestamp_col: ${speed_vars.timestamp_col}
time_min_cutoff: ${speed_vars.time_min_cutoff}

Expand Down
57 changes: 3 additions & 54 deletions gtfs_funnel/vp_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,49 +58,6 @@ def condense_vp_to_linestring(
return


def prepare_vp_for_all_directions(
analysis_date: str,
dict_inputs: dict
) -> gpd.GeoDataFrame:
"""
For each direction, exclude one the opposite direction and
save out the arrays of valid indices.
Every trip will have 4 rows, 1 row corresponding to each direction.
Ex: for a given trip's northbound points, exclude southbound vp.
Subset vp_idx, location_timestamp_local and coordinate arrays
to exclude southbound.
"""
INPUT_FILE = dict_inputs.speeds_tables.vp_condensed_line
EXPORT_FILE = dict_inputs.speeds_tables.vp_nearest_neighbor

vp = delayed(gpd.read_parquet)(
f"{SEGMENT_GCS}{INPUT_FILE}_{analysis_date}.parquet",
)

dfs = [
delayed(vp_transform.combine_valid_vp_for_direction)(
vp, direction)
for direction in vp_transform.ALL_DIRECTIONS
]

results = [compute(i)[0] for i in dfs]

gdf = pd.concat(
results, axis=0, ignore_index=True
).sort_values(
["trip_instance_key", "vp_primary_direction"]
).reset_index(drop=True)

utils.geoparquet_gcs_export(
gdf,
SEGMENT_GCS,
f"{EXPORT_FILE}_{analysis_date}"
)

return


if __name__ == "__main__":

from update_vars import analysis_date_list
Expand All @@ -116,17 +73,9 @@ def prepare_vp_for_all_directions(

condense_vp_to_linestring(analysis_date, GTFS_DATA_DICT)

time1 = datetime.datetime.now()
end = datetime.datetime.now()

logger.info(
f"{analysis_date}: condense vp for trip "
f"{time1 - start}"
)

prepare_vp_for_all_directions(analysis_date, GTFS_DATA_DICT)

end = datetime.datetime.now()
logger.info(
f"{analysis_date}: prepare vp to use in nearest neighbor: "
f"{end - time1}"
)
f"{end - start}"
)
Loading

0 comments on commit 323bbed

Please sign in to comment.