Skip to content

Commit

Permalink
Merge pull request #1330 from cal-itp/dec-open-data
Browse files Browse the repository at this point in the history
Dec open data
  • Loading branch information
edasmalchi authored Dec 20, 2024
2 parents 2b31a51 + 7848fe1 commit 5536a8e
Show file tree
Hide file tree
Showing 143 changed files with 930 additions and 877 deletions.
1 change: 1 addition & 0 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"oct2024f": "2024-10-20",
"oct2024g": "2024-10-21", # additional one-off to capture Amtrak in HQTA
"nov2024": "2024-11-13",
"dec2024": "2024-12-11"
}

y2023_dates = [
Expand Down
2 changes: 1 addition & 1 deletion ca_transit_speed_maps/update_vars_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

catalog = catalog_utils.get_catalog('gtfs_analytics_data')

ANALYSIS_DATE = dt.date.fromisoformat(rt_dates.DATES['nov2024'])
ANALYSIS_DATE = dt.date.fromisoformat(rt_dates.DATES['dec2024'])
PROGRESS_PATH = f'./_rt_progress_{ANALYSIS_DATE}.parquet'
GEOJSON_SUBFOLDER = f'segment_speeds_{ANALYSIS_DATE}/'

Expand Down
2 changes: 1 addition & 1 deletion gtfs_funnel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ all:
# update open_data/update_vars.py
cd ../open_data/ && make create_gtfs_schedule_geospatial_open_data -f Makefile
# update high_quality_transit_areas/update_vars.py
# cd ../high_quality_transit_areas/ && make hqta_data -f Makefile
cd ../high_quality_transit_areas/ && make hqta_data -f Makefile
# update rt_segment_speeds/segment_speed_utils/project_vars.py
cd ../rt_segment_speeds/scripts/ && make all_speeds_pipeline -f Makefile && cd ../../
# update rt_scheduled_v_ran/scripts/update_vars.py
Expand Down
6 changes: 3 additions & 3 deletions gtfs_funnel/crosswalk_gtfs_dataset_key_to_organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ def load_ntd(year: int) -> pd.DataFrame:
Select certain columns.
"""
df = (
tbls.mart_ntd.dim_annual_ntd_agency_information()
tbls.mart_ntd.dim_annual_agency_information()
>> filter(_.year == year, _.state == "CA", _._is_current == True)
>> select(
_.number_of_state_counties,
_.uza_name,
_.primary_uza_name,
_.density,
_.number_of_counties_with_service,
_.state_admin_funds_expended,
_.service_area_sq_miles,
_.population,
_.service_area_pop,
_.subrecipient_type,
_.primary_uza,
_.primary_uza_code,
_.reporter_type,
_.organization_type,
_.agency_name,
Expand Down
16 changes: 16 additions & 0 deletions gtfs_funnel/logs/download_data.log
Original file line number Diff line number Diff line change
Expand Up @@ -678,3 +678,19 @@
2024-11-15 10:19:16.119 | INFO | __main__:download_one_day:29 - # operators to run: 194
2024-11-15 10:19:16.120 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-11-15 10:21:15.285 | INFO | __main__:download_one_day:56 - execution time: 0:02:00.819066
2024-12-17 12:59:10.547 | INFO | __main__:download_one_day:45 - Analysis date: 2024-12-11
2024-12-17 12:59:14.245 | INFO | __main__:download_one_day:52 - # operators to run: 226
2024-12-17 12:59:14.249 | INFO | __main__:download_one_day:56 - *********** Download trips data ***********
2024-12-17 12:59:49.103 | INFO | __main__:download_one_day:86 - execution time: 0:00:38.555174
2024-12-17 13:00:06.712 | INFO | __main__:download_one_day:22 - Analysis date: 2024-12-11
2024-12-17 13:00:09.087 | INFO | __main__:download_one_day:29 - # operators to run: 226
2024-12-17 13:00:09.087 | INFO | __main__:download_one_day:33 - *********** Download stops data ***********
2024-12-17 13:00:20.110 | INFO | __main__:download_one_day:64 - execution time: 0:00:13.397006
2024-12-17 13:00:36.224 | INFO | __main__:download_one_day:22 - Analysis date: 2024-12-11
2024-12-17 13:00:38.381 | INFO | __main__:download_one_day:29 - # operators to run: 226
2024-12-17 13:00:38.382 | INFO | __main__:download_one_day:33 - *********** Download routelines data ***********
2024-12-17 13:02:42.410 | INFO | __main__:download_one_day:63 - execution time: 0:02:06.185085
2024-12-17 13:02:59.291 | INFO | __main__:download_one_day:21 - Analysis date: 2024-12-11
2024-12-17 13:03:01.386 | INFO | __main__:download_one_day:29 - # operators to run: 194
2024-12-17 13:03:01.387 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-12-17 13:04:59.321 | INFO | __main__:download_one_day:56 - execution time: 0:02:00.030094
11 changes: 11 additions & 0 deletions gtfs_funnel/logs/download_vp_v2.log
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,14 @@
2024-11-15 10:35:51.779 | INFO | __main__:<module>:112 - export concatenated vp: 0:03:55.144496
2024-11-15 10:39:51.367 | INFO | __main__:<module>:134 - remove batched parquets
2024-11-15 10:39:51.367 | INFO | __main__:<module>:137 - execution time: 0:08:04.043473
2024-12-17 13:05:19.418 | INFO | __main__:<module>:148 - Analysis date: 2024-12-11
2024-12-17 13:07:43.522 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 0 to GCS: 0:02:24.103479
2024-12-17 13:08:53.304 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 1 to GCS: 0:01:09.780570
2024-12-17 13:12:58.734 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 2 to GCS: 0:04:05.429329
2024-12-17 13:14:54.294 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 3 to GCS: 0:01:55.559501
2024-12-17 13:14:54.295 | INFO | __main__:<module>:155 - execution time: 0:09:34.875675
2024-12-17 13:15:11.818 | INFO | __main__:<module>:97 - Analysis date: 2024-12-11
2024-12-17 13:15:19.487 | INFO | __main__:<module>:105 - concat and filter batched data: 0:00:07.667599
2024-12-17 13:19:32.102 | INFO | __main__:<module>:112 - export concatenated vp: 0:04:12.615694
2024-12-17 13:23:34.976 | INFO | __main__:<module>:134 - remove batched parquets
2024-12-17 13:23:34.976 | INFO | __main__:<module>:137 - execution time: 0:08:23.157313
10 changes: 10 additions & 0 deletions gtfs_funnel/logs/vp_preprocessing.log
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,13 @@
2024-11-15 11:07:16.370 | INFO | __main__:<module>:236 - vp with dwell time 2024-11-13: 0:06:07.784141
2024-11-15 11:13:16.502 | INFO | __main__:<module>:121 - 2024-11-13: condense vp for trip 0:05:43.170466
2024-11-15 11:25:18.878 | INFO | __main__:<module>:129 - 2024-11-13: prepare vp to use in nearest neighbor: 0:12:02.376671
2024-12-17 13:50:53.303 | INFO | __main__:<module>:169 - 2024-12-11: pare down vp: 0:02:22.877788
2024-12-17 13:55:36.457 | INFO | __main__:attach_prior_vp_add_direction:90 - persist vp gddf: 0:04:24.416660
2024-12-17 13:59:57.106 | INFO | __main__:attach_prior_vp_add_direction:122 - np vectorize arrays for direction: 0:04:20.649389
2024-12-17 14:00:04.653 | INFO | __main__:<module>:194 - 2024-12-11: export vp direction: 0:08:52.613283
2024-12-17 14:01:30.377 | INFO | __main__:<module>:200 - 2024-12-11: export usable vp with direction: 0:01:25.723882
2024-12-17 14:01:30.378 | INFO | __main__:<module>:203 - 2024-12-11: vp_direction script execution time: 0:10:18.337165
2024-12-17 14:06:53.796 | INFO | __main__:<module>:213 - compute dwell df: 0:04:47.332255
2024-12-17 14:08:13.713 | INFO | __main__:<module>:235 - merge with original and export: 0:01:19.916578
2024-12-17 14:08:13.714 | INFO | __main__:<module>:236 - vp with dwell time 2024-12-11: 0:06:07.248833
2024-12-17 14:13:50.962 | INFO | __main__:<module>:78 - 2024-12-11: condense vp for trip 0:05:19.406518
2 changes: 1 addition & 1 deletion gtfs_funnel/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)


analysis_date_list = [rt_dates.DATES["nov2024"]]
analysis_date_list = [rt_dates.DATES["dec2024"]]
# analysis_date_list = all_dates
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

Expand Down
97 changes: 80 additions & 17 deletions high_quality_transit_areas/09_enforce_collinearity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,37 @@
"s = pd.Series(share_counts.values())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a987c46-8fcc-40e0-9b47-0bdd3a444666",
"metadata": {},
"outputs": [],
"source": [
"import altair as alt\n",
"# from vega_datasets import data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89753eb6-68e0-402a-bb8e-134649923ef7",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'Shared Stop Count':s})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40052a5d-ef99-41a7-88e3-f2524f6c1d21",
"metadata": {},
"outputs": [],
"source": [
"df = df[df['Shared Stop Count'] <= 10]"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -423,7 +454,7 @@
},
"outputs": [],
"source": [
"feed_names_filtered = feed_names >> filter(_.name.str.contains('Sac'))\n",
"feed_names_filtered = feed_names >> filter(_.name.str.contains('VTA'))\n",
"display(feed_names_filtered)\n",
"gtfs_dataset_key = feed_names_filtered.gtfs_dataset_key.iloc[0]"
]
Expand Down Expand Up @@ -875,7 +906,7 @@
"metadata": {},
"outputs": [],
"source": [
"combined_export.to_parquet(f\"{GCS_FILE_PATH}max_arrivals_by_stop.parquet\")"
"# combined_export.to_parquet(f\"{GCS_FILE_PATH}max_arrivals_by_stop.parquet\")"
]
},
{
Expand Down Expand Up @@ -903,7 +934,7 @@
"metadata": {},
"outputs": [],
"source": [
"ms = areas >> filter(_.hqta_type == ('major_stop_bus'))"
"ms = areas >> filter(_.hqta_type.str.contains('major_stop'))"
]
},
{
Expand Down Expand Up @@ -993,36 +1024,48 @@
"metadata": {},
"outputs": [],
"source": [
"one_pts = hqta_points >> filter(_.agency_primary.str.contains('Sacramento'))"
"# one_pts = hqta_points >> filter(_.agency_primary.str.contains('Los Angeles County'))\n",
"one_pts = hqta_points >> filter(_.hqta_details != 'corridor_other_stop')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d643893d-f0a0-4f7c-b617-f193d9e62dd6",
"id": "5c243176-56eb-4ffb-9623-a6b8a009c18b",
"metadata": {},
"outputs": [],
"source": [
"one_pts.explore(column='hqta_type')"
"from calitp_data_analysis.geography_utils import CA_NAD83Albers"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0131bb42-d63b-4142-915f-e4ce29972d35",
"id": "b9252b23-3e3d-4b87-958d-98af1171bdc4",
"metadata": {},
"outputs": [],
"source": [
"areas.to_file('hqta_areas_update.geojson')\n",
"hqta_points.to_file('hqta_points_update.geojson')"
"one_pts = one_pts.to_crs(CA_NAD83Albers)"
]
},
{
"cell_type": "markdown",
"id": "4b351da3-4825-4caa-8333-33386db868f1",
"cell_type": "code",
"execution_count": null,
"id": "fead1157-a0d5-4947-be13-580d6dc6edea",
"metadata": {},
"outputs": [],
"source": [
"one_pts.geometry = one_pts.buffer(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d643893d-f0a0-4f7c-b617-f193d9e62dd6",
"metadata": {},
"outputs": [],
"source": [
"## all bus"
"one_pts.explore(column='hqta_type', tiles=\"Cartodb Positron\")"
]
},
{
Expand All @@ -1032,8 +1075,8 @@
"metadata": {},
"outputs": [],
"source": [
"# all_bus = gpd.read_parquet(f\"{GCS_FILE_PATH}all_bus.parquet\")\n",
"hqta_segments = gpd.read_parquet(f\"{GCS_FILE_PATH}hqta_segments.parquet\")"
"all_bus = gpd.read_parquet(f\"{GCS_FILE_PATH}all_bus.parquet\")\n",
"# hqta_segments = gpd.read_parquet(f\"{GCS_FILE_PATH}hqta_segments.parquet\")"
]
},
{
Expand All @@ -1053,7 +1096,17 @@
"metadata": {},
"outputs": [],
"source": [
"hqta_segments = find_inconclusive_directions(hqta_segments)"
"# hqta_segments = find_inconclusive_directions(hqta_segments)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53421714-f669-4413-8192-b658bc5af47c",
"metadata": {},
"outputs": [],
"source": [
"# hqta_segments"
]
},
{
Expand All @@ -1063,7 +1116,7 @@
"metadata": {},
"outputs": [],
"source": [
"# one = hqta_segments >> filter(_.schedule_gtfs_dataset_key == '70c8a8b71c815224299523bf2115924a')"
"one = all_bus >> filter(_.schedule_gtfs_dataset_key == 'fb467982dcc77a7f9199bebe709bb700')"
]
},
{
Expand All @@ -1073,7 +1126,7 @@
"metadata": {},
"outputs": [],
"source": [
"# one.geometry = one.buffer(35)"
"one.geometry = one.buffer(35)"
]
},
{
Expand All @@ -1086,6 +1139,16 @@
"# one = one >> filter(_.route_id.isin(['228', '227', '062', '061', '056', '106']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56688d44-b377-4b74-8a32-d7f2d2dff3db",
"metadata": {},
"outputs": [],
"source": [
"one.explore(column='segment_direction')"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
8 changes: 4 additions & 4 deletions high_quality_transit_areas/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
hqta_data:
python rail_ferry_brt_stops.py
python create_hqta_segments.py
python create_aggregate_stop_frequencies.py
python sjoin_stops_to_segments.py
# python rail_ferry_brt_stops.py
# python create_hqta_segments.py
# python create_aggregate_stop_frequencies.py
# python sjoin_stops_to_segments.py
python prep_pairwise_intersections.py
python get_intersections.py
python create_bus_hqta_types.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def filter_qualifying_stops(one_stop_st: pd.DataFrame, qualify_pairs: list) -> p
)
this_stop_route_dirs = (one_stop_st >> distinct(_.route_dir, _.route_dir_count)).route_dir.to_numpy() # preserves sort order
aggregation_ok_route_dirs = check_stop(this_stop_route_dirs, qualify_pairs)
return one_stop_df >> filter(_.route_dir.isin(aggregation_ok_route_dirs))
return one_stop_st >> filter(_.route_dir.isin(aggregation_ok_route_dirs))

def collinear_filter_feed(
gtfs_dataset_key: str,
Expand Down
16 changes: 16 additions & 0 deletions high_quality_transit_areas/logs/hqta_processing.log
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,19 @@
2024-12-13 16:11:06.186 | INFO | __main__:<module>:175 - C3_create_bus_hqta_types 2024-10-21 execution time: 0:00:16.642029
2024-12-13 16:11:42.067 | INFO | __main__:<module>:219 - D1_assemble_hqta_points 2024-10-21 execution time: 0:00:19.108129
2024-12-13 16:12:12.529 | INFO | __main__:<module>:168 - D2_assemble_hqta_polygons 2024-10-21 execution time: 0:00:12.842061
2024-12-17 14:38:38.771 | INFO | __main__:<module>:277 - A1_rail_ferry_brt_stops 2024-12-11 execution time: 0:00:19.093442
2024-12-17 14:47:04.101 | INFO | __main__:<module>:248 - B1_create_hqta_segments execution time: 0:08:09.120437
2024-12-17 14:59:29.127 | INFO | __main__:<module>:277 - A1_rail_ferry_brt_stops 2024-12-11 execution time: 0:00:19.427916
2024-12-17 15:05:20.491 | INFO | __main__:<module>:248 - B1_create_hqta_segments execution time: 0:05:35.265734
2024-12-17 15:08:05.518 | INFO | __main__:<module>:333 - B2_create_aggregate_stop_frequencies 2024-12-11 execution time: 0:02:27.789099
2024-12-17 15:08:34.978 | INFO | __main__:<module>:326 - B3_sjoin_stops_to_segments 2024-12-11 execution time: 0:00:12.351944
2024-12-17 15:08:56.152 | INFO | __main__:<module>:147 - C1_prep_pairwise_intersections 2024-12-11 execution time: 0:00:04.328776
2024-12-17 15:09:20.373 | INFO | __main__:<module>:124 - C2_find_intersections 2024-12-11 execution time: 0:00:07.923657
2024-12-17 15:09:48.649 | INFO | __main__:<module>:175 - C3_create_bus_hqta_types 2024-12-11 execution time: 0:00:11.784740
2024-12-17 15:10:23.397 | INFO | __main__:<module>:219 - D1_assemble_hqta_points 2024-12-11 execution time: 0:00:18.075380
2024-12-17 15:10:53.886 | INFO | __main__:<module>:168 - D2_assemble_hqta_polygons 2024-12-11 execution time: 0:00:13.604352
2024-12-19 14:06:19.225 | INFO | __main__:<module>:149 - C1_prep_pairwise_intersections 2024-12-11 execution time: 0:00:04.821081
2024-12-19 14:06:46.848 | INFO | __main__:<module>:124 - C2_find_intersections 2024-12-11 execution time: 0:00:06.916373
2024-12-19 14:07:17.276 | INFO | __main__:<module>:175 - C3_create_bus_hqta_types 2024-12-11 execution time: 0:00:13.000987
2024-12-19 14:07:53.167 | INFO | __main__:<module>:219 - D1_assemble_hqta_points 2024-12-11 execution time: 0:00:18.462037
2024-12-19 14:08:28.603 | INFO | __main__:<module>:168 - D2_assemble_hqta_polygons 2024-12-11 execution time: 0:00:17.899141
9 changes: 7 additions & 2 deletions high_quality_transit_areas/prep_pairwise_intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,27 @@ def sjoin_against_other_operators(
"""
Spatial join of the in group vs the out group.
This could be the operator vs other operators,
or a route vs other routes.
or a route vs other routes. This is currently
east-west vs north-south segments, which requires
the additional step of excluding intersections
resulting from the same route changing direction.
Create a crosswalk / pairwise table showing these links.
Compile all of them, because finding intersections is
computationally expensive,
so we want to do it on fewer rows.
"""
route_cols = ["hqta_segment_id", "segment_direction"]
route_cols = ["hqta_segment_id", "segment_direction", "route_key"]

s1 = gpd.sjoin(
in_group_df[route_cols + ["geometry"]],
out_group_df[route_cols + ["geometry"]],
how = "inner",
predicate = "intersects"
).drop(columns = ["index_right", "geometry"])

s1 = s1[s1["route_key_left"] != s1["route_key_right"]]

route_pairs = (
s1.rename(
Expand Down
2 changes: 1 addition & 1 deletion high_quality_transit_areas/update_vars.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from shared_utils import rt_dates
import datetime as dt

analysis_date = rt_dates.DATES["oct2024g"]
analysis_date = rt_dates.DATES["dec2024"]

GCS_FILE_PATH = ("gs://calitp-analytics-data/data-analyses/"
"high_quality_transit_areas/")
Expand Down
2 changes: 1 addition & 1 deletion open_data/update_vars.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from shared_utils import catalog_utils, rt_dates

analysis_date = rt_dates.DATES["nov2024"]
analysis_date = rt_dates.DATES["dec2024"]

GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

Expand Down
Loading

0 comments on commit 5536a8e

Please sign in to comment.