diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index cc4bab0a0..db0b364d4 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -119,7 +119,7 @@ speedmap_segments: trip_stop_cols: ["trip_instance_key", "stop_sequence", "stop_sequence1"] shape_stop_cols: ["shape_array_key", "shape_id"] stop_pair_cols: ["stop_pair", "stop_pair_name", "segment_id"] - route_dir_cols: ["route_id", "route_short_name"] + route_dir_cols: ["route_id", "direction_id"] segments_file: "segment_options/speedmap_segments" shape_stop_single_segment: "rollup_singleday/speeds_shape_speedmap_segments" shape_stop_single_segment_detail: "rollup_singleday/speeds_shape_speedmap_segments_detail" diff --git a/rt_segment_speeds/40_speedmap_scratchpad.ipynb b/rt_segment_speeds/40_speedmap_scratchpad.ipynb index 2ceb1a11b..a2f19bc9a 100644 --- a/rt_segment_speeds/40_speedmap_scratchpad.ipynb +++ b/rt_segment_speeds/40_speedmap_scratchpad.ipynb @@ -774,6 +774,27 @@ "df >> filter(_.route_id.str.contains('910'))" ] }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a602c285-26f8-4d1f-8412-dd0437cb7543", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'proxy_stop_times': 'stop_time_expansion/speedmap_stop_times', 'stage2': 'nearest/nearest_vp_speedmap_proxy', 'stage2b': 'nearest/nearest2_vp_speedmap_proxy', 'stage3': 'speedmap/stop_arrivals_proxy', 'stage3b': 'speedmap/stop_arrivals', 'stage4': 'speedmap/speeds', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence', 'stop_sequence1'], 'shape_stop_cols': ['shape_array_key', 'shape_id'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name', 'segment_id'], 'route_dir_cols': ['route_id', 'route_short_name'], 'segments_file': 'segment_options/speedmap_segments', 'shape_stop_single_segment': 'rollup_singleday/speeds_shape_speedmap_segments', 'shape_stop_single_segment_detail': 'rollup_singleday/speeds_shape_speedmap_segments_detail', 'route_dir_single_segment': 'rollup_singleday/speeds_route_dir_speedmap_segments', 'route_dir_multi_segment': 'rollup_multiday/speeds_route_dir_speedmap_segments', 'min_trip_minutes': '${speed_vars.time_min_cutoff}', 'max_trip_minutes': 180, 'max_speed': '${speed_vars.max_speed}', 'min_meters_elapsed': 1609, 'segment_meters': 1000}" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "catalog.speedmap_segments" + ] + }, { "cell_type": "code", "execution_count": 37, @@ -786,7 +807,17 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 41, + "id": "ad48874e-a6ea-4ee0-8e9d-7eb5f3f0a581", + "metadata": {}, + "outputs": [], + "source": [ + "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.route_dir_single_segment}_{analysis_date}.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 42, "id": "ef401846-f54c-4805-a7aa-53ed1b630837", "metadata": {}, "outputs": [], @@ -796,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 43, "id": "ffcec5ce-dcc2-402b-bef0-60f6bd25dd77", "metadata": {}, "outputs": [ @@ -822,16 +853,14 @@ " \n", " \n", " schedule_gtfs_dataset_key\n", - " shape_array_key\n", - " shape_id\n", " route_id\n", " route_short_name\n", " stop_pair\n", " stop_pair_name\n", " segment_id\n", - " time_of_day\n", + " time_period\n", " p50_mph\n", - " ...\n", + " n_trips\n", " p20_mph\n", " p80_mph\n", " name\n", @@ -840,25 +869,20 @@ " organization_name\n", " base64_url\n", " geometry\n", - " n_trips_sch\n", - " trips_hr_sch\n", " \n", " \n", " \n", " \n", "\n", - "

0 rows × 21 columns

\n", "" ], "text/plain": [ "Empty GeoDataFrame\n", - "Columns: [schedule_gtfs_dataset_key, shape_array_key, shape_id, route_id, route_short_name, stop_pair, stop_pair_name, segment_id, time_of_day, p50_mph, n_trips, p20_mph, p80_mph, name, caltrans_district, organization_source_record_id, organization_name, base64_url, geometry, n_trips_sch, trips_hr_sch]\n", - "Index: []\n", - "\n", - "[0 rows x 21 columns]" + "Columns: [schedule_gtfs_dataset_key, route_id, route_short_name, stop_pair, stop_pair_name, segment_id, time_period, p50_mph, n_trips, p20_mph, p80_mph, name, caltrans_district, organization_source_record_id, organization_name, base64_url, geometry]\n", + "Index: []" ] }, - "execution_count": 39, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } diff --git a/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py b/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py index 3ddef7107..aa77b5930 100644 --- a/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py +++ b/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py @@ -521,7 +521,7 @@ def get_sched_trips_hr(analysis_date: str) -> pd.DataFrame: frequency available. Currently only supports detailed time of day. """ keep_trip_cols = ['trip_instance_key', 'gtfs_dataset_key', 'route_id', - 'shape_id'] + 'shape_id', 'route_short_name'] trips = helpers.import_scheduled_trips(analysis_date, columns=keep_trip_cols) trips = trips.rename( columns={'gtfs_dataset_key': 'schedule_gtfs_dataset_key'})