Skip to content

Commit

Permalink
Merge pull request #1307 from cal-itp/hermosa-export
Browse files Browse the repository at this point in the history
Hermosa export
  • Loading branch information
edasmalchi authored Nov 26, 2024
2 parents d88e2dd + a7f435c commit c1742ea
Show file tree
Hide file tree
Showing 8 changed files with 366 additions and 2 deletions.
301 changes: 301 additions & 0 deletions ca_transit_speed_maps/03_hermosa_adhoc_export.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 24,
"id": "46898b5c-b5cc-4096-ab68-8c3e42fab870",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"import calitp_data_analysis.magics\n",
"# from update_vars_index import ANALYSIS_DATE\n",
"\n",
"import speedmap_utils\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"import numpy as np\n",
"from siuba import *\n",
"import shared_utils\n",
"catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')"
]
},
{
"cell_type": "markdown",
"id": "5f100bb7-182c-4d78-a8f8-1b2eabf70650",
"metadata": {},
"source": [
"## https://github.com/cal-itp/data-analyses/issues/1306"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9f15ed6a-5ab7-4f57-9695-3f762781b74c",
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"## parameters cell\n",
"organization_source_record_ids = ['rec4pgjrmdhCh4z01', 'rec8zhnCPETu6qEiH', 'recPnGkwdpnr8jmHB',\n",
" 'recvzE9NXgGMmqcTH']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7c53d945-7295-4d86-9110-b6f4f52f9975",
"metadata": {},
"outputs": [],
"source": [
"dates = ['jan2024', 'feb2024', 'mar2024', 'apr2024',\n",
" 'may2024', 'jun2024', 'jul2024', 'aug2024',\n",
" 'sep2024', 'oct2024', 'nov2024']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5843d0b3-a500-4c2e-8a65-bfe2935ce089",
"metadata": {},
"outputs": [],
"source": [
"dates = [shared_utils.rt_dates.DATES[date] for date in dates]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1989aba3-8cf6-48c4-aceb-2399eea80de2",
"metadata": {},
"outputs": [],
"source": [
"def read_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:\n",
" '''\n",
" Get filtered detailed speedmap segments for an organization, and relevant district SHN.\n",
" '''\n",
" path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n",
" # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'\n",
" speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],\n",
" ['route_short_name', 'in', ['232', '109', '438', '13']]]) # aggregated\n",
" assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'\n",
" speedmap_segs['date'] = analysis_date\n",
" \n",
" # TODO move upstream and investigate\n",
" speedmap_segs['fast_slow_ratio'] = speedmap_segs.p80_mph / speedmap_segs.p20_mph\n",
" speedmap_segs.fast_slow_ratio = speedmap_segs.fast_slow_ratio.replace(np.inf, 3)\n",
" speedmap_segs = speedmap_segs.round(1)\n",
" # speedmap_segs = prepare_segment_gdf(speedmap_segs)\n",
" # shn = gpd.read_parquet(rt_utils.SHN_PATH)\n",
" # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))\n",
" \n",
" return speedmap_segs"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c3333da1-b90c-4ed3-8655-cd668ef33ed4",
"metadata": {},
"outputs": [],
"source": [
"gdf = read_segments(organization_source_record_ids, dates[0])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "420b81e4-80db-4385-9961-58007bbdb5b5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-01-17\n",
"2024-02-14\n",
"2024-03-13\n",
"2024-04-17\n",
"2024-05-22\n",
"2024-06-12\n",
"2024-07-17\n",
"2024-08-14\n",
"2024-09-18\n",
"2024-10-16\n",
"2024-11-13\n"
]
}
],
"source": [
"lines = gpd.GeoDataFrame()\n",
"for date in dates:\n",
" print(date)\n",
" lines = pd.concat([read_segments(organization_source_record_ids, date), lines])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "359dc68a-903b-455f-b419-920fa506ebed",
"metadata": {},
"outputs": [],
"source": [
"def read_process_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:\n",
" '''\n",
" Get filtered detailed speedmap segments for an organization, and relevant district SHN.\n",
" '''\n",
" path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n",
" # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'\n",
" speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],\n",
" ['route_short_name', 'in', ['232', '109', '438', '13']]]) # aggregated\n",
" assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'\n",
" speedmap_segs['date'] = analysis_date\n",
" speedmap_segs = speedmap_utils.prepare_segment_gdf(speedmap_segs)\n",
" # shn = gpd.read_parquet(rt_utils.SHN_PATH)\n",
" # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))\n",
" \n",
" return speedmap_segs"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "bc4bc624-f5d1-42e0-89a1-858671753de6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-01-17\n",
"2024-02-14\n",
"2024-03-13\n",
"2024-04-17\n",
"2024-05-22\n",
"2024-06-12\n",
"2024-07-17\n",
"2024-08-14\n",
"2024-09-18\n",
"2024-10-16\n",
"2024-11-13\n"
]
}
],
"source": [
"polygons = gpd.GeoDataFrame()\n",
"for date in dates:\n",
" print(date)\n",
" polygons = pd.concat([read_process_segments(organization_source_record_ids, date), polygons])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "b5786ff5-e33e-4bbd-acc2-b8459f4e1f13",
"metadata": {},
"outputs": [],
"source": [
"polygons.to_file('hermosa_speedmap_polygons.geojson')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "fbd4f2b9-257a-4c60-b9d0-fa51f8fb2982",
"metadata": {},
"outputs": [],
"source": [
"lines.to_file('hermosa_speedmap_lines.geojson')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d26b1dbf-15db-4d28-bac9-f3f3885d717a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 37,
"id": "821bfbee-f2d4-4e06-aec0-aa877d27db4f",
"metadata": {},
"outputs": [],
"source": [
"def write_gz(gdf, path):\n",
" geojson_str = gdf.to_json()\n",
" geojson_bytes = geojson_str.encode(\"utf-8\")\n",
" # if verbose:\n",
" # print(f\"writing to {path}\")\n",
" with open(path, \"wb\") as writer:\n",
" with gzip.GzipFile(fileobj=writer, mode=\"w\") as gz:\n",
" gz.write(geojson_bytes)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "7733fb48-5403-43b7-9ba2-a86992d1f79a",
"metadata": {},
"outputs": [],
"source": [
"import gzip"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "4f1776af-65b8-413d-8d02-5aa66f0671b0",
"metadata": {},
"outputs": [],
"source": [
"write_gz(polygons, 'hermosa_speedmap_polygons.geojson.gz')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "88f8b256-4bce-4cc4-b878-d1fe1c512100",
"metadata": {},
"outputs": [],
"source": [
"write_gz(lines, 'hermosa_speedmap_lines.geojson.gz')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion ca_transit_speed_maps/speedmap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def read_segments_shn(organization_source_record_id: str) -> (gpd.GeoDataFrame,
path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
# path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated
assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'x
assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'
speedmap_segs = prepare_segment_gdf(speedmap_segs)
shn = gpd.read_parquet(rt_utils.SHN_PATH)
this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))
Expand Down
21 changes: 21 additions & 0 deletions rt_segment_speeds/logs/avg_speeds.log
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,24 @@
2024-11-15 16:47:26.586 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-11-13'] execution time: 0:04:17.582380
2024-11-15 16:47:58.691 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:15.263016
2024-11-15 16:48:12.028 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-11-13'] execution time: 0:00:28.600051
2024-11-25 15:13:37.373 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-02-14'] execution time: 0:03:26.605094
2024-11-25 15:17:40.046 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-02-14'] execution time: 0:04:02.591857
2024-11-25 15:56:22.157 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:14.429525
2024-11-25 15:56:33.970 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-02-14'] execution time: 0:00:26.242649
2024-11-25 16:13:02.901 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-02-14'] execution time: 0:05:49.602618
2024-11-25 16:18:14.085 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-02-14'] execution time: 0:05:11.036585
2024-11-25 16:22:17.639 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-02-14'] execution time: 0:04:03.406461
2024-11-25 17:36:36.926 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-04-17'] execution time: 0:03:37.971380
2024-11-25 17:40:46.424 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-04-17'] execution time: 0:04:09.371271
2024-11-25 18:21:02.631 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:15.281556
2024-11-25 18:21:14.991 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-04-17'] execution time: 0:00:27.640841
2024-11-25 18:38:30.685 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-04-17'] execution time: 0:05:56.867437
2024-11-25 18:43:43.127 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-04-17'] execution time: 0:05:12.297407
2024-11-25 18:47:46.966 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-04-17'] execution time: 0:04:03.631509
2024-11-25 20:16:48.107 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-07-17'] execution time: 0:03:01.260045
2024-11-25 20:20:35.395 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-07-17'] execution time: 0:03:47.132474
2024-11-25 20:58:10.113 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:14.389431
2024-11-25 20:58:21.747 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-07-17'] execution time: 0:00:26.022796
2024-11-25 21:14:33.232 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-07-17'] execution time: 0:05:37.386735
2024-11-25 21:19:21.360 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-07-17'] execution time: 0:04:47.990211
2024-11-25 21:23:05.965 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-07-17'] execution time: 0:03:44.478500
6 changes: 6 additions & 0 deletions rt_segment_speeds/logs/cut_stop_segments.log
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@
2024-11-15 12:05:36.298 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-11-13: 0:05:14.586933
2024-11-25 11:51:31.638 | INFO | __main__:<module>:155 - cut segments 2024-01-17: 0:21:23.110335
2024-11-25 11:57:30.752 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-01-17: 0:04:25.925486
2024-11-25 14:26:37.567 | INFO | __main__:<module>:155 - cut segments 2024-02-14: 0:20:14.982057
2024-11-25 14:32:07.865 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-02-14: 0:04:06.440641
2024-11-25 16:46:11.154 | INFO | __main__:<module>:155 - cut segments 2024-04-17: 0:21:24.813915
2024-11-25 16:52:05.319 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-04-17: 0:04:28.728100
2024-11-25 19:29:33.128 | INFO | __main__:<module>:155 - cut segments 2024-07-17: 0:19:53.239259
2024-11-25 19:35:15.706 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-07-17: 0:04:18.392759
9 changes: 9 additions & 0 deletions rt_segment_speeds/logs/interpolate_stop_arrival.log
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,12 @@
2024-11-15 12:44:55.085 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-11-13: 2024-11-13: 0:15:15.789486
2024-11-15 13:34:38.198 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-11-13: 2024-11-13: 0:15:41.682831
2024-11-15 13:46:49.459 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-11-13: 2024-11-13: 0:03:04.062272
2024-11-25 15:08:19.483 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-02-14: 2024-02-14: 0:14:15.053152
2024-11-25 15:54:03.502 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-02-14: 2024-02-14: 0:14:19.447104
2024-11-25 16:04:58.835 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-02-14: 2024-02-14: 0:02:41.448366
2024-11-25 17:31:02.567 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-04-17: 2024-04-17: 0:15:55.554641
2024-11-25 18:18:13.220 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-04-17: 2024-04-17: 0:14:30.258514
2024-11-25 18:30:15.050 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-04-17: 2024-04-17: 0:02:50.595931
2024-11-25 20:11:30.072 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-07-17: 2024-07-17: 0:14:23.629904
2024-11-25 20:55:50.314 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-07-17: 2024-07-17: 0:13:51.485049
2024-11-25 21:06:50.404 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-07-17: 2024-07-17: 0:02:33.588358
18 changes: 18 additions & 0 deletions rt_segment_speeds/logs/nearest_vp.log
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,21 @@
2024-11-15 13:18:56.444 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-11-13: 0:10:40.446083
2024-11-15 13:40:07.685 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-11-13: 0:02:36.767174
2024-11-15 13:43:45.362 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-11-13: 0:03:37.215365
2024-11-25 14:44:38.184 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-02-14: 0:12:12.259095
2024-11-25 14:54:04.369 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-02-14: 0:09:24.152632
2024-11-25 15:29:52.547 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-02-14: 0:11:56.560691
2024-11-25 15:39:43.982 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-02-14: 0:09:49.647278
2024-11-25 15:59:02.856 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-02-14: 0:02:13.301548
2024-11-25 16:02:17.354 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-02-14: 0:03:14.096258
2024-11-25 17:04:55.099 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-04-17: 0:12:31.879883
2024-11-25 17:15:06.934 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-04-17: 0:10:09.477922
2024-11-25 17:53:10.041 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-04-17: 0:12:06.825727
2024-11-25 18:03:42.892 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-04-17: 0:10:30.863101
2024-11-25 18:23:48.992 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-04-17: 0:02:18.365295
2024-11-25 18:27:24.420 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-04-17: 0:03:35.000398
2024-11-25 19:47:37.984 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-07-17: 0:12:03.630595
2024-11-25 19:57:06.324 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-07-17: 0:09:26.376710
2024-11-25 20:32:12.883 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-07-17: 0:11:20.451706
2024-11-25 20:41:58.771 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-07-17: 0:09:44.129455
2024-11-25 21:00:54.275 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-07-17: 0:02:16.592519
2024-11-25 21:04:16.776 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-07-17: 0:03:22.103136
Loading

0 comments on commit c1742ea

Please sign in to comment.