Skip to content

Commit

Permalink
debug metro J line: can't group on route_short_name since it's not re…
Browse files Browse the repository at this point in the history
…quired per spec and can be nan. Instead, add in when joining to scheduled frequencies
  • Loading branch information
edasmalchi committed Nov 4, 2024
1 parent ac4c409 commit a1848ae
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
2 changes: 1 addition & 1 deletion _shared_utils/shared_utils/gtfs_analytics_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ speedmap_segments:
trip_stop_cols: ["trip_instance_key", "stop_sequence", "stop_sequence1"]
shape_stop_cols: ["shape_array_key", "shape_id"]
stop_pair_cols: ["stop_pair", "stop_pair_name", "segment_id"]
route_dir_cols: ["route_id", "route_short_name"]
route_dir_cols: ["route_id", "direction_id"]
segments_file: "segment_options/speedmap_segments"
shape_stop_single_segment: "rollup_singleday/speeds_shape_speedmap_segments"
shape_stop_single_segment_detail: "rollup_singleday/speeds_shape_speedmap_segments_detail"
Expand Down
52 changes: 38 additions & 14 deletions rt_segment_speeds/40_speedmap_scratchpad.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,27 @@
"df >> filter(_.route_id.str.contains('910'))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "a602c285-26f8-4d1f-8412-dd0437cb7543",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'proxy_stop_times': 'stop_time_expansion/speedmap_stop_times', 'stage2': 'nearest/nearest_vp_speedmap_proxy', 'stage2b': 'nearest/nearest2_vp_speedmap_proxy', 'stage3': 'speedmap/stop_arrivals_proxy', 'stage3b': 'speedmap/stop_arrivals', 'stage4': 'speedmap/speeds', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence', 'stop_sequence1'], 'shape_stop_cols': ['shape_array_key', 'shape_id'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name', 'segment_id'], 'route_dir_cols': ['route_id', 'route_short_name'], 'segments_file': 'segment_options/speedmap_segments', 'shape_stop_single_segment': 'rollup_singleday/speeds_shape_speedmap_segments', 'shape_stop_single_segment_detail': 'rollup_singleday/speeds_shape_speedmap_segments_detail', 'route_dir_single_segment': 'rollup_singleday/speeds_route_dir_speedmap_segments', 'route_dir_multi_segment': 'rollup_multiday/speeds_route_dir_speedmap_segments', 'min_trip_minutes': '${speed_vars.time_min_cutoff}', 'max_trip_minutes': 180, 'max_speed': '${speed_vars.max_speed}', 'min_meters_elapsed': 1609, 'segment_meters': 1000}"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog.speedmap_segments"
]
},
{
"cell_type": "code",
"execution_count": 37,
Expand All @@ -786,7 +807,17 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 41,
"id": "ad48874e-a6ea-4ee0-8e9d-7eb5f3f0a581",
"metadata": {},
"outputs": [],
"source": [
"path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.route_dir_single_segment}_{analysis_date}.parquet'"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "ef401846-f54c-4805-a7aa-53ed1b630837",
"metadata": {},
"outputs": [],
Expand All @@ -796,7 +827,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 43,
"id": "ffcec5ce-dcc2-402b-bef0-60f6bd25dd77",
"metadata": {},
"outputs": [
Expand All @@ -822,16 +853,14 @@
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>schedule_gtfs_dataset_key</th>\n",
" <th>shape_array_key</th>\n",
" <th>shape_id</th>\n",
" <th>route_id</th>\n",
" <th>route_short_name</th>\n",
" <th>stop_pair</th>\n",
" <th>stop_pair_name</th>\n",
" <th>segment_id</th>\n",
" <th>time_of_day</th>\n",
" <th>time_period</th>\n",
" <th>p50_mph</th>\n",
" <th>...</th>\n",
" <th>n_trips</th>\n",
" <th>p20_mph</th>\n",
" <th>p80_mph</th>\n",
" <th>name</th>\n",
Expand All @@ -840,25 +869,20 @@
" <th>organization_name</th>\n",
" <th>base64_url</th>\n",
" <th>geometry</th>\n",
" <th>n_trips_sch</th>\n",
" <th>trips_hr_sch</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty GeoDataFrame\n",
"Columns: [schedule_gtfs_dataset_key, shape_array_key, shape_id, route_id, route_short_name, stop_pair, stop_pair_name, segment_id, time_of_day, p50_mph, n_trips, p20_mph, p80_mph, name, caltrans_district, organization_source_record_id, organization_name, base64_url, geometry, n_trips_sch, trips_hr_sch]\n",
"Index: []\n",
"\n",
"[0 rows x 21 columns]"
"Columns: [schedule_gtfs_dataset_key, route_id, route_short_name, stop_pair, stop_pair_name, segment_id, time_period, p50_mph, n_trips, p20_mph, p80_mph, name, caltrans_district, organization_source_record_id, organization_name, base64_url, geometry]\n",
"Index: []"
]
},
"execution_count": 39,
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def get_sched_trips_hr(analysis_date: str) -> pd.DataFrame:
frequency available. Currently only supports detailed time of day.
"""
keep_trip_cols = ['trip_instance_key', 'gtfs_dataset_key', 'route_id',
'shape_id']
'shape_id', 'route_short_name']
trips = helpers.import_scheduled_trips(analysis_date, columns=keep_trip_cols)
trips = trips.rename(
columns={'gtfs_dataset_key': 'schedule_gtfs_dataset_key'})
Expand Down

0 comments on commit a1848ae

Please sign in to comment.