diff --git a/gtfs_digest/41_explore_ct_district.ipynb b/gtfs_digest/41_explore_ct_district.ipynb deleted file mode 100644 index 54306cdc3..000000000 --- a/gtfs_digest/41_explore_ct_district.ipynb +++ /dev/null @@ -1,379 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "5137b49c-d292-4545-9bc6-0521c797e332", - "metadata": {}, - "source": [ - "## Filter mutliple feeds\n", - "Evan: I'm also seeing multiple feeds in the District Digest Map. I don't mind them, but it may be helpful to try to filter for just Public Currently Operating Fixed Route or Regional Subfeed." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "f80b95ac-4f34-4c31-b392-b943c8a08815", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import merge_data\n", - "import numpy as np\n", - "import pandas as pd\n", - "from segment_speed_utils import gtfs_schedule_wrangling, helpers, time_series_utils\n", - "from shared_utils import catalog_utils, rt_dates, rt_utils\n", - "from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "fb63b9e4-f3e3-4a44-bc4f-005f705029c1", - "metadata": {}, - "outputs": [], - "source": [ - "pd.options.display.max_columns = 100\n", - "pd.options.display.float_format = \"{:.2f}\".format\n", - "pd.set_option(\"display.max_rows\", None)\n", - "pd.set_option(\"display.max_colwidth\", None)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8e88db14-e0a5-48e8-a9df-10fc5b027ad4", - "metadata": {}, - "outputs": [], - "source": [ - "district = \"08 - San Bernardino\"" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "cc5657c7-9147-4683-b3be-c59fe95db817", - "metadata": {}, - "outputs": [], - "source": [ - "OPERATOR_FILE = GTFS_DATA_DICT.digest_tables.operator_profiles\n", - "OPERATOR_ROUTE = GTFS_DATA_DICT.digest_tables.operator_routes_map" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "435d1028-222a-4c55-99ca-cb19972ce39b", - "metadata": {}, - "outputs": [], - "source": [ - "operator_df = pd.read_parquet(\n", - " f\"{RT_SCHED_GCS}{OPERATOR_FILE}.parquet\",\n", - " filters = [[(\"caltrans_district\", \"==\", district)]]\n", - ")\n", - "\n", - "# using name instead of schedule_gtfs_dataset_key allows us to get \n", - "# the last ones for LA Metro without keeping extraneous rows for LA Metro when keys changed\n", - "operator_df = operator_df.sort_values(\n", - " [\"service_date\", \"name\"], \n", - " ascending=[False, True]\n", - ").drop_duplicates(\n", - " subset=[\"name\"]\n", - ").reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "2b4f113c-c110-4b5d-a0b7-3d992aa6b4e9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['2024-11-13T00:00:00.000000000', '2024-10-16T00:00:00.000000000'],\n", - " dtype='datetime64[ns]')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "operator_df.service_date.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "9ae9c383-a8ab-4916-a524-d108f927ee2e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['schedule_gtfs_dataset_key', 'vp_per_min_agency',\n", - " 'spatial_accuracy_agency', 'service_date', 'operator_n_routes',\n", - " 'operator_n_trips', 'operator_n_shapes', 'operator_n_stops',\n", - " 'operator_n_arrivals', 'operator_route_length_miles',\n", - " 'operator_arrivals_per_stop', 'n_downtown_local_routes',\n", - " 'n_local_routes', 'n_coverage_routes', 'n_rapid_routes',\n", - " 'n_express_routes', 'n_rail_routes', 'name',\n", - " 'organization_source_record_id', 'organization_name',\n", - " 'caltrans_district', 'counties_served', 'service_area_sq_miles',\n", - " 'hq_city', 'uza_name', 'service_area_pop', 'organization_type',\n", - " 'primary_uza', 'reporter_type'],\n", - " dtype='object')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "operator_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "bd29fe28-0b63-490b-ae1c-6e0a64ca3a37", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(17, 29)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "operator_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d5115797-db04-49ae-841a-4c573349dea5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameorganization_nameservice_date
0Banning Pass ScheduleCity of Banning2024-11-13
1Basin Transit GMV ScheduleBasin Transit2024-11-13
2Beaumont Transit ScheduleCity of Beaumont2024-11-13
3Corona ScheduleCity of Corona2024-11-13
4Desert Roadrunner GMV SchedulePalo Verde Valley Transit Agency2024-11-13
5Desert Roadrunner SchedulePalo Verde Valley Transit Agency2024-11-13
6Morongo Basin ScheduleBasin Transit2024-11-13
7Mountain Transit GMV ScheduleMountain Area Regional Transit Authority2024-11-13
8Mountain Transit ScheduleMountain Area Regional Transit Authority2024-11-13
9Needles ScheduleCity of Needles2024-11-13
10OmniTrans ScheduleOmniTrans2024-11-13
11Riverside ScheduleRiverside Transit Agency2024-11-13
12Victor Valley GMV ScheduleVictor Valley Transit Authority2024-11-13
13Victor Valley ScheduleVictor Valley Transit Authority2024-11-13
14NoneNone2024-11-13
15Beaumont Pass ScheduleCity of Beaumont2024-10-16
16Flixbus ScheduleGreyhound2024-10-16
\n", - "
" - ], - "text/plain": [ - " name organization_name \\\n", - "0 Banning Pass Schedule City of Banning \n", - "1 Basin Transit GMV Schedule Basin Transit \n", - "2 Beaumont Transit Schedule City of Beaumont \n", - "3 Corona Schedule City of Corona \n", - "4 Desert Roadrunner GMV Schedule Palo Verde Valley Transit Agency \n", - "5 Desert Roadrunner Schedule Palo Verde Valley Transit Agency \n", - "6 Morongo Basin Schedule Basin Transit \n", - "7 Mountain Transit GMV Schedule Mountain Area Regional Transit Authority \n", - "8 Mountain Transit Schedule Mountain Area Regional Transit Authority \n", - "9 Needles Schedule City of Needles \n", - "10 OmniTrans Schedule OmniTrans \n", - "11 Riverside Schedule Riverside Transit Agency \n", - "12 Victor Valley GMV Schedule Victor Valley Transit Authority \n", - "13 Victor Valley Schedule Victor Valley Transit Authority \n", - "14 None None \n", - "15 Beaumont Pass Schedule City of Beaumont \n", - "16 Flixbus Schedule Greyhound \n", - "\n", - " service_date \n", - "0 2024-11-13 \n", - "1 2024-11-13 \n", - "2 2024-11-13 \n", - "3 2024-11-13 \n", - "4 2024-11-13 \n", - "5 2024-11-13 \n", - "6 2024-11-13 \n", - "7 2024-11-13 \n", - "8 2024-11-13 \n", - "9 2024-11-13 \n", - "10 2024-11-13 \n", - "11 2024-11-13 \n", - "12 2024-11-13 \n", - "13 2024-11-13 \n", - "14 2024-11-13 \n", - "15 2024-10-16 \n", - "16 2024-10-16 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "operator_df[[\"name\",\"organization_name\", \"service_date\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "25110722-5b98-4b95-88d9-1454c220069b", - "metadata": {}, - "outputs": [], - "source": [ - "operators_in_district = operator_df.schedule_gtfs_dataset_key.unique()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/gtfs_digest/40_transit_bunching.ipynb b/gtfs_digest/41_transit_bunching.ipynb similarity index 92% rename from gtfs_digest/40_transit_bunching.ipynb rename to gtfs_digest/41_transit_bunching.ipynb index a17527da3..0ba015af3 100644 --- a/gtfs_digest/40_transit_bunching.ipynb +++ b/gtfs_digest/41_transit_bunching.ipynb @@ -192,80 +192,6 @@ "len(route_dir)" ] }, - { - "cell_type": "code", - "execution_count": 12, - "id": "25fc88ec-4c14-46e1-9c92-6fd4bc02c70f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
schedule_gtfs_dataset_keyroute_iddirection_idtime_periodroute_primary_directionfrequencyservice_dateheadway_minutes
2253cc53a0dbf5df90e3009b9cb5d89d80ba301.00peakEastbound1.002024-05-2260.00
\n", - "
" - ], - "text/plain": [ - " schedule_gtfs_dataset_key route_id direction_id time_period \\\n", - "2253 cc53a0dbf5df90e3009b9cb5d89d80ba 30 1.00 peak \n", - "\n", - " route_primary_direction frequency service_date headway_minutes \n", - "2253 Eastbound 1.00 2024-05-22 60.00 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "route_dir.loc[\n", - " (route_dir.schedule_gtfs_dataset_key == \"cc53a0dbf5df90e3009b9cb5d89d80ba\")\n", - " & (route_dir.route_id == \"30\")\n", - " & (route_dir.direction_id == 1)\n", - "]" - ] - }, { "cell_type": "markdown", "id": "84741559-46a2-4a62-a6e2-8843771aea1f", @@ -276,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d35238bb-2418-466b-8814-96382abeb3eb", "metadata": {}, "outputs": [], @@ -287,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "6272b8aa-0d76-4e18-bc76-66548f54c9a0", "metadata": {}, "outputs": [], @@ -302,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "d5d62df4-cffb-4bc8-9ad1-9766a8ec2bf1", "metadata": {}, "outputs": [], @@ -318,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "4666bf1d-6456-49d6-955a-5ab77556af15", "metadata": {}, "outputs": [ @@ -381,7 +307,7 @@ "1 Corona Schedule 08 - San Bernardino 2024-05-22 " ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -392,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "d9d562b4-7ce1-4ea5-bdba-e8e0abbc3815", "metadata": {}, "outputs": [], @@ -407,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "3e80b935-ecb4-44cd-9174-72c0d4568e14", "metadata": {}, "outputs": [], @@ -446,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 19, "id": "a81fd393-0e6b-4ec0-8911-1a79ab53d3ba", "metadata": {}, "outputs": [ @@ -514,7 +440,7 @@ "3426 City of Los Angeles LA DOT Schedule 07 - Los Angeles " ] }, - "execution_count": 89, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -529,7 +455,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 20, "id": "62e66d21-2227-41ca-be01-d44c62ac7b14", "metadata": {}, "outputs": [ @@ -597,7 +523,7 @@ "3427 City of Los Angeles LA DOT Schedule 07 - Los Angeles " ] }, - "execution_count": 90, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -620,7 +546,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "674f8ee6-3f6d-4f90-90ba-7ea9ee688b28", "metadata": {}, "outputs": [], @@ -630,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "5333afa8-4849-4864-b253-b03a1093e84f", "metadata": {}, "outputs": [], @@ -640,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "111e2c27-337d-4440-a1f6-10ec582a6f9e", "metadata": {}, "outputs": [], @@ -659,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "id": "e1aed68a-0ed2-4da4-904a-cb91681e7f38", "metadata": {}, "outputs": [], @@ -671,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "id": "04901f96-76d5-4d99-b3d1-f174ef989357", "metadata": {}, "outputs": [], @@ -686,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "9c1ba23b-30df-4916-a522-eb70bd5afdb9", "metadata": {}, "outputs": [ @@ -696,7 +622,7 @@ "1303" ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -715,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "id": "d9688e03-4b61-4736-b9d5-3539b0de80b2", "metadata": {}, "outputs": [], @@ -740,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "id": "8dac05a6-0ba9-472b-85a2-5a0081550efb", "metadata": {}, "outputs": [], @@ -750,7 +676,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "id": "cd13aa3e-f222-49f8-b923-1e9e901f7bfb", "metadata": {}, "outputs": [], @@ -763,7 +689,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "ae72b7fc-ec7b-4dcd-8553-ac2abce5da1d", "metadata": {}, "outputs": [], @@ -773,39 +699,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": 31, - "id": "f1adaa08-80da-455b-a755-34d959e79c25", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 12.00\n", - "mean 60.00\n", - "std 0.00\n", - "min 60.00\n", - "25% 60.00\n", - "50% 60.00\n", - "75% 60.00\n", - "max 60.00\n", - "Name: headway_minutes, dtype: float64" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "trips_routes.loc[\n", - " (trips_routes.schedule_gtfs_dataset_key == \"cc53a0dbf5df90e3009b9cb5d89d80ba\")\n", - " & (trips_routes.route_id == \"30\")\n", - " & (trips_routes.direction_id == 1)\n", - "].headway_minutes.describe()" - ] - }, { "cell_type": "markdown", "id": "e3be8778-84ff-479f-a3b2-178e374da5f2", @@ -816,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "3f6727c6-205f-4cc8-8a68-42e8dec6e4b3", "metadata": {}, "outputs": [], @@ -828,7 +721,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "0922c60c-c46c-4240-bce2-7be30034c2f8", "metadata": {}, "outputs": [ @@ -915,7 +808,7 @@ "0 07 - Los Angeles Bus " ] }, - "execution_count": 33, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -926,7 +819,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "id": "2633ffab-5c8d-479f-805a-3994d0231c38", "metadata": {}, "outputs": [ @@ -936,7 +829,7 @@ "(2601262, 7)" ] }, - "execution_count": 34, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -947,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "id": "049a2833-f132-431a-8f44-92f31cd11d8a", "metadata": {}, "outputs": [], @@ -965,7 +858,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "id": "7c59d770-d379-422e-a23d-9140c23df375", "metadata": {}, "outputs": [ @@ -975,7 +868,7 @@ "15029" ] }, - "execution_count": 36, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -994,7 +887,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "id": "5713f8ff-2d01-4cb7-a7c3-2af8d09ccaed", "metadata": {}, "outputs": [ @@ -1004,7 +897,7 @@ "3672925" ] }, - "execution_count": 37, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1015,7 +908,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "id": "fdd38b5b-a8ab-4959-9ace-c4bdfd32bd8a", "metadata": {}, "outputs": [], @@ -1025,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "id": "1904ff1e-99e8-49f7-9a3f-fcf4d44961dd", "metadata": {}, "outputs": [ @@ -1035,7 +928,7 @@ "3061772" ] }, - "execution_count": 39, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1044,42 +937,6 @@ "len(trips_routes_times2)" ] }, - { - "cell_type": "code", - "execution_count": 41, - "id": "e44aadb1-c054-48c3-9613-00ead9d9a655", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 476.00\n", - "mean 60.00\n", - "std 0.00\n", - "min 60.00\n", - "25% 60.00\n", - "50% 60.00\n", - "75% 60.00\n", - "max 60.00\n", - "Name: headway_minutes, dtype: float64" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "trips_routes_times2.loc[\n", - " (\n", - " trips_routes_times2.schedule_gtfs_dataset_key\n", - " == \"cc53a0dbf5df90e3009b9cb5d89d80ba\"\n", - " )\n", - " & (trips_routes_times2.route_id == \"30\")\n", - " & (trips_routes_times2.direction_id == 1)\n", - "].headway_minutes.describe()" - ] - }, { "cell_type": "markdown", "id": "2d09574e-464e-4f31-8f53-0596911dcabe", @@ -1090,7 +947,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 39, "id": "fa4001cf-6763-42cf-b726-e3ca57915615", "metadata": {}, "outputs": [], @@ -1116,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 40, "id": "2cb2194a-91f5-43af-a534-68caf01e9dca", "metadata": {}, "outputs": [], @@ -1126,7 +983,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 41, "id": "0ef36e93-79ed-4f86-b16a-9d28d90aea1a", "metadata": {}, "outputs": [], @@ -1144,42 +1001,6 @@ ").reset_index(drop=True)" ] }, - { - "cell_type": "code", - "execution_count": 45, - "id": "623769ea-22e5-4e0c-a7df-72c14b6b3a2b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 476.00\n", - "mean 60.00\n", - "std 0.00\n", - "min 60.00\n", - "25% 60.00\n", - "50% 60.00\n", - "75% 60.00\n", - "max 60.00\n", - "Name: headway_minutes, dtype: float64" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "trips_routes_times4.loc[\n", - " (\n", - " trips_routes_times4.schedule_gtfs_dataset_key\n", - " == \"cc53a0dbf5df90e3009b9cb5d89d80ba\"\n", - " )\n", - " & (trips_routes_times4.route_id == \"30\")\n", - " & (trips_routes_times4.direction_id == 1)\n", - "].headway_minutes.describe()" - ] - }, { "cell_type": "markdown", "id": "48737ec7-31be-4743-97ca-c6c000670a13", @@ -1192,7 +1013,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 42, "id": "3ce579d1-0392-41a6-b535-9f4422a78216", "metadata": {}, "outputs": [], @@ -1204,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 43, "id": "1d3e4966-1f57-4117-9f73-506055a42d7b", "metadata": {}, "outputs": [], @@ -1224,7 +1045,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 44, "id": "003abe45-5d55-4839-80f8-83e693214427", "metadata": {}, "outputs": [], @@ -1234,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 45, "id": "c2ab63be-be6b-4f2c-aaaf-b6733f433e10", "metadata": {}, "outputs": [], @@ -1247,7 +1068,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 46, "id": "a16335dc-47a0-4ea1-9612-fc6d5dfea1d6", "metadata": {}, "outputs": [ @@ -1279,7 +1100,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 47, "id": "8f0e5d8a-5e26-4c75-b593-2cc9aa275a35", "metadata": {}, "outputs": [], @@ -1293,7 +1114,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 48, "id": "041138a9-5275-43d6-8aa1-014c732dcc06", "metadata": {}, "outputs": [], @@ -1307,7 +1128,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 49, "id": "84a4772d-653b-49dd-8db2-33e67f3c0708", "metadata": {}, "outputs": [], @@ -1321,7 +1142,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 50, "id": "443fea09-7fd2-4d18-8754-98022eae1975", "metadata": {}, "outputs": [], @@ -1335,7 +1156,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 51, "id": "2c07b22a-658a-4d4b-9760-d517ebbe349f", "metadata": {}, "outputs": [], @@ -1349,7 +1170,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 52, "id": "202e4ea2-d045-4b79-a7f3-85250f73b452", "metadata": {}, "outputs": [ @@ -1381,19 +1202,19 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 53, "id": "03b5b1b7-b4d4-4ee6-9037-885cf92cce2b", "metadata": {}, "outputs": [], "source": [ "extreme_values = trips_routes_times4.loc[\n", - " (trips_routes_times4.delay_min < -5) | (trips_routes_times4.delay_min > 20.50)\n", + " (trips_routes_times4.delay_min < -5.18) | (trips_routes_times4.delay_min > 20.50)\n", "]" ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 54, "id": "32d36ca4-04ec-4c20-8bda-ee151a897b90", "metadata": {}, "outputs": [ @@ -1401,19 +1222,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "count 63545.00\n", - "mean 36.35\n", - "std 251.35\n", + "count 60949.00\n", + "mean 38.11\n", + "std 256.50\n", "min -2279.98\n", - "1% -120.17\n", - "2% -54.87\n", - "5% -24.75\n", - "10% -13.52\n", - "50% -5.08\n", - "90% 42.63\n", - "95% 71.62\n", - "98% 1442.29\n", - "99% 1450.57\n", + "1% -126.55\n", + "2% -55.63\n", + "5% -25.72\n", + "10% -13.95\n", + "50% -5.18\n", + "90% 43.75\n", + "95% 74.00\n", + "98% 1443.00\n", + "99% 1450.90\n", "max 2277.53\n", "Name: delay_min, dtype: float64\n" ] @@ -1425,7 +1246,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 55, "id": "19ed3ea7-e9c6-435f-a88c-e570d0928584", "metadata": {}, "outputs": [ @@ -1472,67 +1293,67 @@ " \n", " \n", " \n", - " 645805\n", + " 989050\n", " 2024-05-22\n", - " 07 - Los Angeles\n", - " 0666caf3ec1ecc96b74f4477ee4bc939\n", - " 608992664173210532aa3e6cc573be2f\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " Metro Local Line\n", + " 05 - San Luis Obispo\n", + " 239f3baf3dd3b9e9464f66a777f9897d\n", + " a3c82f955ca3b93746786deb7fe4fb0d\n", + " Santa Barbara Metropolitan Transit District\n", + " Ellwood\n", " Bus\n", - " 602-13172\n", + " 25\n", " 1.00\n", - " 16293\n", - " 17\n", - " f795b1d137cadd84b9f8f642c9535d60\n", - " 64780\n", - " 60480.00\n", - " 37.04\n", - " 2024-05-22 17:59:40\n", - " 2024-05-22 16:48:00\n", - " 71.67\n", + " 30\n", + " 2\n", + " c4a350efc3387d2cc5c1d556bf76d74b\n", + " 56953\n", + " 28932.00\n", + " 43.48\n", + " 2024-05-22 15:49:13\n", + " 2024-05-22 08:02:12\n", + " 467.02\n", " \n", " \n", - " 2296908\n", + " 1712446\n", " 2024-05-22\n", - " 07 - Los Angeles\n", - " cc53a0dbf5df90e3009b9cb5d89d80ba\n", - " 2cfdf0e33e9229d6b0ad124d956f5856\n", - " City of Los Angeles\n", - " DASH A\n", + " 11 - San Diego\n", + " baeeb157e85a901e47b828ef9fe75091\n", + " db8c6e0cf5ece2a8cdb5bdc71d049bd1\n", + " San Diego International Airport\n", + " Old Town - UTC via Pacific Beach\n", " Bus\n", - " 4447\n", + " 30\n", " 0.00\n", - " 4896850\n", - " 24\n", - " 75441ab9a58f6d0b5458650496eec50b\n", - " 63696\n", - " 59220.00\n", - " 7.06\n", - " 2024-05-22 17:41:36\n", - " 2024-05-22 16:27:00\n", - " 74.60\n", + " 12270\n", + " 10\n", + " 0ef61dffae42c56b1873ea6bf8dac916\n", + " 9616\n", + " 86400.00\n", + " 15.46\n", + " 2024-05-22 02:40:16\n", + " 2024-05-22 00:00:00\n", + " 160.27\n", " \n", " \n", - " 988951\n", + " 945593\n", " 2024-05-22\n", - " 05 - San Luis Obispo\n", - " 239f3baf3dd3b9e9464f66a777f9897d\n", - " a3c82f955ca3b93746786deb7fe4fb0d\n", - " Santa Barbara Metropolitan Transit District\n", - " East Santa Barbara\n", + " 07 - Los Angeles\n", + " 1770249a5a2e770ca90628434d4934b1\n", + " 926867fdee73d5fbfe4f011871bcd830\n", + " City of Thousand Oaks\n", + " Route 10\n", " Bus\n", - " 2\n", - " 1.00\n", - " 119\n", - " 8\n", - " 724f34a370c7f84045fea95cd9232712\n", - " 51594\n", - " 24501.00\n", - " 16.00\n", - " 2024-05-22 14:19:54\n", - " 2024-05-22 06:48:21\n", - " 451.55\n", + " 4778\n", + " 0.00\n", + " 3737147\n", + " 18\n", + " 0602dd78ba40b3981325457f8063a82e\n", + " 57236\n", + " 48720.00\n", + " 60.00\n", + " 2024-05-22 15:53:56\n", + " 2024-05-22 13:32:00\n", + " 141.93\n", " \n", " \n", "\n", @@ -1540,42 +1361,42 @@ ], "text/plain": [ " service_date caltrans_district schedule_gtfs_dataset_key \\\n", - "645805 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", - "2296908 2024-05-22 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", - "988951 2024-05-22 05 - San Luis Obispo 239f3baf3dd3b9e9464f66a777f9897d \n", + "989050 2024-05-22 05 - San Luis Obispo 239f3baf3dd3b9e9464f66a777f9897d \n", + "1712446 2024-05-22 11 - San Diego baeeb157e85a901e47b828ef9fe75091 \n", + "945593 2024-05-22 07 - Los Angeles 1770249a5a2e770ca90628434d4934b1 \n", "\n", " feed_key \\\n", - "645805 608992664173210532aa3e6cc573be2f \n", - "2296908 2cfdf0e33e9229d6b0ad124d956f5856 \n", - "988951 a3c82f955ca3b93746786deb7fe4fb0d \n", + "989050 a3c82f955ca3b93746786deb7fe4fb0d \n", + "1712446 db8c6e0cf5ece2a8cdb5bdc71d049bd1 \n", + "945593 926867fdee73d5fbfe4f011871bcd830 \n", "\n", - " organization_name \\\n", - "645805 Los Angeles County Metropolitan Transportation Authority \n", - "2296908 City of Los Angeles \n", - "988951 Santa Barbara Metropolitan Transit District \n", + " organization_name \\\n", + "989050 Santa Barbara Metropolitan Transit District \n", + "1712446 San Diego International Airport \n", + "945593 City of Thousand Oaks \n", "\n", - " route_long_name route_type route_id direction_id stop_id \\\n", - "645805 Metro Local Line Bus 602-13172 1.00 16293 \n", - "2296908 DASH A Bus 4447 0.00 4896850 \n", - "988951 East Santa Barbara Bus 2 1.00 119 \n", + " route_long_name route_type route_id direction_id \\\n", + "989050 Ellwood Bus 25 1.00 \n", + "1712446 Old Town - UTC via Pacific Beach Bus 30 0.00 \n", + "945593 Route 10 Bus 4778 0.00 \n", "\n", - " stop_sequence trip_instance_key rt_arrival_sec \\\n", - "645805 17 f795b1d137cadd84b9f8f642c9535d60 64780 \n", - "2296908 24 75441ab9a58f6d0b5458650496eec50b 63696 \n", - "988951 8 724f34a370c7f84045fea95cd9232712 51594 \n", + " stop_id stop_sequence trip_instance_key \\\n", + "989050 30 2 c4a350efc3387d2cc5c1d556bf76d74b \n", + "1712446 12270 10 0ef61dffae42c56b1873ea6bf8dac916 \n", + "945593 3737147 18 0602dd78ba40b3981325457f8063a82e \n", "\n", - " scheduled_arrival_sec headway_minutes converted_rt_arrival \\\n", - "645805 60480.00 37.04 2024-05-22 17:59:40 \n", - "2296908 59220.00 7.06 2024-05-22 17:41:36 \n", - "988951 24501.00 16.00 2024-05-22 14:19:54 \n", - "\n", - " converted_schd_arrival delay_min \n", - "645805 2024-05-22 16:48:00 71.67 \n", - "2296908 2024-05-22 16:27:00 74.60 \n", - "988951 2024-05-22 06:48:21 451.55 " + " rt_arrival_sec scheduled_arrival_sec headway_minutes \\\n", + "989050 56953 28932.00 43.48 \n", + "1712446 9616 86400.00 15.46 \n", + "945593 57236 48720.00 60.00 \n", + "\n", + " converted_rt_arrival converted_schd_arrival delay_min \n", + "989050 2024-05-22 15:49:13 2024-05-22 08:02:12 467.02 \n", + "1712446 2024-05-22 02:40:16 2024-05-22 00:00:00 160.27 \n", + "945593 2024-05-22 15:53:56 2024-05-22 13:32:00 141.93 " ] }, - "execution_count": 59, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1584,42 +1405,6 @@ "extreme_values.loc[extreme_values.delay_min > 71].sample(3)" ] }, - { - "cell_type": "code", - "execution_count": 60, - "id": "c274d5b5-1e09-4523-be3c-82a459731538", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 476.00\n", - "mean 60.00\n", - "std 0.00\n", - "min 60.00\n", - "25% 60.00\n", - "50% 60.00\n", - "75% 60.00\n", - "max 60.00\n", - "Name: headway_minutes, dtype: float64" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "trips_routes_times4.loc[\n", - " (\n", - " trips_routes_times4.schedule_gtfs_dataset_key\n", - " == \"cc53a0dbf5df90e3009b9cb5d89d80ba\"\n", - " )\n", - " & (trips_routes_times4.route_id == \"30\")\n", - " & (trips_routes_times4.direction_id == 1)\n", - "].headway_minutes.describe()" - ] - }, { "cell_type": "markdown", "id": "e92c8e0f-ab5a-40d2-9a88-7dfc556cc87b", @@ -1630,7 +1415,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 56, "id": "a01ffb02-256d-44bd-997b-0362fdc455c4", "metadata": {}, "outputs": [], @@ -1659,7 +1444,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 57, "id": "b9171520-7358-4f22-9d9c-c5156e710f1b", "metadata": {}, "outputs": [], @@ -1680,7 +1465,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 58, "id": "1200d9f1-46a2-46d8-8187-8d05273a6c8d", "metadata": {}, "outputs": [], @@ -1695,7 +1480,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 59, "id": "4ee3528d-e87b-45ac-acbd-cc2536a1a817", "metadata": {}, "outputs": [ @@ -1713,7 +1498,7 @@ "Name: headway_minutes, dtype: float64" ] }, - "execution_count": 64, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -1739,7 +1524,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 60, "id": "71dbdd85-975c-4b7a-b847-60e274d97c6a", "metadata": {}, "outputs": [], @@ -1752,7 +1537,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "id": "6d20db15-76e6-4a58-a6c6-9e8e2d231fcc", "metadata": {}, "outputs": [], @@ -1772,7 +1557,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 62, "id": "dfc37f55-9bcd-417f-959b-21aeb0f7506a", "metadata": { "scrolled": true, @@ -1785,7 +1570,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 63, "id": "ab51dff9-55e7-4b5d-8b9f-72143f4370d9", "metadata": {}, "outputs": [ @@ -1829,7 +1614,7 @@ "1132081 55a01ef72af21906934ae8ffb4786e86 500.00" ] }, - "execution_count": 99, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -1844,7 +1629,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 64, "id": "2f17ba52-cfd5-4edb-a116-c31fd80b314d", "metadata": {}, "outputs": [ @@ -1915,7 +1700,7 @@ "1380 04 - Oakland " ] }, - "execution_count": 100, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -1930,7 +1715,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 65, "id": "6c61cdbb-daeb-40d1-9f2c-0640f2244ee7", "metadata": {}, "outputs": [ @@ -2001,7 +1786,7 @@ "1379 04 - Oakland " ] }, - "execution_count": 101, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -2016,7 +1801,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 66, "id": "2630f495-1f9e-4db5-a3c2-782d7cddb58b", "metadata": {}, "outputs": [ @@ -2058,7 +1843,7 @@ "1132018 60.00" ] }, - "execution_count": 97, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -2073,7 +1858,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 67, "id": "172b01a9-d224-4092-b65c-06d5b868de69", "metadata": {}, "outputs": [ @@ -2115,7 +1900,7 @@ "2279235 157.89" ] }, - "execution_count": 92, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } @@ -2128,6 +1913,272 @@ "][[\"headway_minutes\"]].drop_duplicates()" ] }, + { + "cell_type": "code", + "execution_count": 68, + "id": "f8fa5a3f-e1b7-49bd-bd1d-ee56f50f8785", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stop_idstop_sequencert_arrival_secscheduled_arrival_secheadway_minutesconverted_rt_arrivalactual_arrival_lag_min
22796971457172436817165400.0060.002024-05-22 18:56:11NaN
22796981457172437065070020.0060.002024-05-22 19:37:3041.32
22796991457172437170271460.0060.002024-05-22 19:55:0217.53
227970041401225038750400.0060.002024-05-22 13:59:47NaN
227970141401225218151600.0060.002024-05-22 14:29:4129.90
\n", + "
" + ], + "text/plain": [ + " stop_id stop_sequence rt_arrival_sec scheduled_arrival_sec \\\n", + "2279697 1457172 43 68171 65400.00 \n", + "2279698 1457172 43 70650 70020.00 \n", + "2279699 1457172 43 71702 71460.00 \n", + "2279700 414012 2 50387 50400.00 \n", + "2279701 414012 2 52181 51600.00 \n", + "\n", + " headway_minutes converted_rt_arrival actual_arrival_lag_min \n", + "2279697 60.00 2024-05-22 18:56:11 NaN \n", + "2279698 60.00 2024-05-22 19:37:30 41.32 \n", + "2279699 60.00 2024-05-22 19:55:02 17.53 \n", + "2279700 60.00 2024-05-22 13:59:47 NaN \n", + "2279701 60.00 2024-05-22 14:29:41 29.90 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_routes_times4.loc[\n", + " (trips_routes_times4.organization_name == \"City of Los Angeles\")\n", + " & (trips_routes_times4.route_id == \"30\")\n", + " & (trips_routes_times4.direction_id == 1)\n", + "][\n", + " [\n", + " \"stop_id\",\n", + " \"stop_sequence\",\n", + " \"rt_arrival_sec\",\n", + " \"scheduled_arrival_sec\",\n", + " \"headway_minutes\",\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag_min\",\n", + " ]\n", + "].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "22694ef6-b9b6-47ba-bdad-a9ac6d679b91", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stop_idstop_sequencert_arrival_secscheduled_arrival_secheadway_minutesconverted_rt_arrivalactual_arrival_lag_min
2279235145717111818617760.00157.892024-05-22 05:03:06NaN
2279236145717111855918660.00157.892024-05-22 05:09:196.22
2279237145717112000719860.00157.892024-05-22 05:33:2724.13
2279238145717112064020460.00157.892024-05-22 05:44:0010.55
2279239145717112105321060.00157.892024-05-22 05:50:536.88
\n", + "
" + ], + "text/plain": [ + " stop_id stop_sequence rt_arrival_sec scheduled_arrival_sec \\\n", + "2279235 1457171 1 18186 17760.00 \n", + "2279236 1457171 1 18559 18660.00 \n", + "2279237 1457171 1 20007 19860.00 \n", + "2279238 1457171 1 20640 20460.00 \n", + "2279239 1457171 1 21053 21060.00 \n", + "\n", + " headway_minutes converted_rt_arrival actual_arrival_lag_min \n", + "2279235 157.89 2024-05-22 05:03:06 NaN \n", + "2279236 157.89 2024-05-22 05:09:19 6.22 \n", + "2279237 157.89 2024-05-22 05:33:27 24.13 \n", + "2279238 157.89 2024-05-22 05:44:00 10.55 \n", + "2279239 157.89 2024-05-22 05:50:53 6.88 " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_routes_times4.loc[\n", + " (trips_routes_times4.organization_name == \"City of Los Angeles\")\n", + " & (trips_routes_times4.route_id == \"30\")\n", + " & (trips_routes_times4.direction_id == 0)\n", + "][\n", + " [\n", + " \"stop_id\",\n", + " \"stop_sequence\",\n", + " \"rt_arrival_sec\",\n", + " \"scheduled_arrival_sec\",\n", + " \"headway_minutes\",\n", + " \"converted_rt_arrival\",\n", + " \"actual_arrival_lag_min\",\n", + " ]\n", + "].head()" + ] + }, { "cell_type": "markdown", "id": "28362518-a54b-4f5d-a4d7-24a3d8ddefd0", @@ -2171,7 +2222,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 70, "id": "f0f0f4b7-fa64-4b01-a141-5dd78c59693b", "metadata": {}, "outputs": [], @@ -2189,7 +2240,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 71, "id": "7bea63e5-45d0-4d06-8c1c-fd34a69ffde7", "metadata": {}, "outputs": [], @@ -2201,19 +2252,262 @@ }, { "cell_type": "code", - "execution_count": 68, - "id": "c67c6299-68f0-414f-a9c1-e0b27511b9e5", + "execution_count": 72, + "id": "a1f0017c-c457-439b-9eac-86e1037dd3da", "metadata": {}, - "outputs": [], - "source": [ - "transit_matters_df1[\"bunched_y_n\"] = np.where(\n", - " transit_matters_df1[\"pct_actual_schd_headway\"] < 0.25, \"bunched\", \"not bunched\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 69, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2871307.00\n", + "mean 1.46\n", + "std 4.40\n", + "min -249.09\n", + "1% 0.11\n", + "2% 0.22\n", + "5% 0.47\n", + "10% 0.65\n", + "50% 1.04\n", + "90% 2.24\n", + "95% 3.12\n", + "98% 4.96\n", + "99% 8.24\n", + "max 249.95\n", + "Name: pct_actual_schd_headway, dtype: float64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_df1[\"pct_actual_schd_headway\"].describe(percentiles)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "f322436f-5f66-44c4-9b39-703cc6252a1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
service_datecaltrans_districtschedule_gtfs_dataset_keyfeed_keyorganization_nameroute_long_nameroute_typeroute_iddirection_idstop_idstop_sequencetrip_instance_keyrt_arrival_secscheduled_arrival_secheadway_minutesconverted_rt_arrivalconverted_schd_arrivaldelay_minrt_hourrt_minschd_hourschd_minconverted_schd_arrival_tooltipconverted_rt_arrival_tooltipactual_arrival_lag_minpct_actual_schd_headway
11885782024-05-2204 - Oakland7cc0cb1871dfd558f11a2885c145d1447f69c2fdaa134642f14064a0b64d1495City and County of San FranciscoCALIFORNIABus10.00163001377dcdcc4aa4e2be2467711f6b41baf7c29886355.005.782024-05-23 00:04:582024-05-21 23:59:151445.720423.0059.002024-05-21 23:59:152024-05-23 00:04:581443.65249.75
11887392024-05-2204 - Oakland7cc0cb1871dfd558f11a2885c145d1447f69c2fdaa134642f14064a0b64d1495City and County of San FranciscoCALIFORNIABus10.00163011177dcdcc4aa4e2be2467711f6b41baf7c29686277.005.782024-05-23 00:04:562024-05-21 23:57:571446.980423.0057.002024-05-21 23:57:572024-05-23 00:04:561444.80249.95
11892172024-05-2204 - Oakland7cc0cb1871dfd558f11a2885c145d1447f69c2fdaa134642f14064a0b64d1495City and County of San FranciscoCALIFORNIABus10.00163041277dcdcc4aa4e2be2467711f6b41baf7c29786316.005.782024-05-23 00:04:572024-05-21 23:58:361446.350423.0058.002024-05-21 23:58:362024-05-23 00:04:571444.17249.84
\n", + "
" + ], + "text/plain": [ + " service_date caltrans_district schedule_gtfs_dataset_key \\\n", + "1188578 2024-05-22 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", + "1188739 2024-05-22 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", + "1189217 2024-05-22 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", + "\n", + " feed_key organization_name \\\n", + "1188578 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", + "1188739 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", + "1189217 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", + "\n", + " route_long_name route_type route_id direction_id stop_id \\\n", + "1188578 CALIFORNIA Bus 1 0.00 16300 \n", + "1188739 CALIFORNIA Bus 1 0.00 16301 \n", + "1189217 CALIFORNIA Bus 1 0.00 16304 \n", + "\n", + " stop_sequence trip_instance_key rt_arrival_sec \\\n", + "1188578 13 77dcdcc4aa4e2be2467711f6b41baf7c 298 \n", + "1188739 11 77dcdcc4aa4e2be2467711f6b41baf7c 296 \n", + "1189217 12 77dcdcc4aa4e2be2467711f6b41baf7c 297 \n", + "\n", + " scheduled_arrival_sec headway_minutes converted_rt_arrival \\\n", + "1188578 86355.00 5.78 2024-05-23 00:04:58 \n", + "1188739 86277.00 5.78 2024-05-23 00:04:56 \n", + "1189217 86316.00 5.78 2024-05-23 00:04:57 \n", + "\n", + " converted_schd_arrival delay_min rt_hour rt_min schd_hour \\\n", + "1188578 2024-05-21 23:59:15 1445.72 0 4 23.00 \n", + "1188739 2024-05-21 23:57:57 1446.98 0 4 23.00 \n", + "1189217 2024-05-21 23:58:36 1446.35 0 4 23.00 \n", + "\n", + " schd_min converted_schd_arrival_tooltip converted_rt_arrival_tooltip \\\n", + "1188578 59.00 2024-05-21 23:59:15 2024-05-23 00:04:58 \n", + "1188739 57.00 2024-05-21 23:57:57 2024-05-23 00:04:56 \n", + "1189217 58.00 2024-05-21 23:58:36 2024-05-23 00:04:57 \n", + "\n", + " actual_arrival_lag_min pct_actual_schd_headway \n", + "1188578 1443.65 249.75 \n", + "1188739 1444.80 249.95 \n", + "1189217 1444.17 249.84 " + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matters_df1.loc[transit_matters_df1.pct_actual_schd_headway > 249].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "39e2b5ca-f1a6-4533-9e8c-90d7be20f5cd", + "metadata": {}, + "outputs": [], + "source": [ + "# transit_matters_df1.loc[transit_matters_df1.pct_actual_schd_headway < 0].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "c67c6299-68f0-414f-a9c1-e0b27511b9e5", + "metadata": {}, + "outputs": [], + "source": [ + "transit_matters_df1[\"bunched_y_n\"] = np.where(\n", + " transit_matters_df1[\"pct_actual_schd_headway\"] < 0.25, \"bunched\", \"not bunched\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 76, "id": "d7d56ce7-66e9-4084-a725-a9eff7c4c5b2", "metadata": {}, "outputs": [ @@ -2225,7 +2519,7 @@ "Name: bunched_y_n, dtype: float64" ] }, - "execution_count": 69, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } @@ -2236,7 +2530,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 77, "id": "0998a245-3330-40a6-9213-d20333a3c3f0", "metadata": {}, "outputs": [ @@ -2254,7 +2548,7 @@ "Name: headway_minutes, dtype: float64" ] }, - "execution_count": 109, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -2300,7 +2594,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 78, "id": "e0706e7e-0d56-43b2-bf3c-4205e9277c64", "metadata": {}, "outputs": [], @@ -2318,7 +2612,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 79, "id": "2def9283-d995-4001-b412-0fa03a855cd5", "metadata": {}, "outputs": [], @@ -2333,7 +2627,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 80, "id": "dcd302cb-5f30-4318-8b28-cb29f6c376cd", "metadata": {}, "outputs": [ @@ -2345,7 +2639,7 @@ "Name: bunched_y_n, dtype: float64" ] }, - "execution_count": 73, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } @@ -2354,487 +2648,73 @@ "two_minutes_df.bunched_y_n.value_counts() / len(two_minutes_df)" ] }, - { - "cell_type": "markdown", - "id": "d37b1feb-1c2b-4ad4-9bee-94a45749d69b", - "metadata": {}, - "source": [ - "### Aggregate\n" - ] - }, { "cell_type": "code", - "execution_count": 74, - "id": "30bbbbad-4587-43e3-b5c7-632079f5a588", + "execution_count": 81, + "id": "326c60b2-509f-4320-aba8-b0c0645d30b5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(1598, 26)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def bunched_not_bunched(\n", - " df: pd.DataFrame, bunched_y_n: str, groupby_cols: list, agg_col: str, agg_type: str\n", - ") -> pd.DataFrame:\n", - " df2 = df.loc[df.bunched_y_n == bunched_y_n].reset_index(drop=True)\n", - "\n", - " bunched_y_n = bunched_y_n.replace(\" \", \"_\")\n", - " agg1 = (df2.groupby(groupby_cols).agg({agg_col: agg_type}).reset_index()).rename(\n", - " columns={agg_col: bunched_y_n}\n", - " )\n", - " return agg1" + "two_minutes_df.loc[two_minutes_df.actual_arrival_lag_min < 0].shape" ] }, { "cell_type": "code", - "execution_count": 75, - "id": "a9a430ab-c1bf-49c0-aa55-9cfc29beb941", + "execution_count": 82, + "id": "86a14187-14e9-43e6-9b0f-d223e9720adb", "metadata": {}, "outputs": [], "source": [ - "def aggregation(\n", - " df: pd.DataFrame, groupby_cols: list, merge_cols: list, agg_col: str, agg_type: str\n", - ") -> pd.DataFrame:\n", - " # Find total trips that are bunched\n", - " bunched = bunched_not_bunched(df, \"bunched\", groupby_cols, agg_col, agg_type)\n", - " # Find total trips that are NOT bunched\n", - " not_bunched = bunched_not_bunched(\n", - " df, \"not bunched\", groupby_cols, agg_col, agg_type\n", - " )\n", - " # Merge\n", - " m1 = pd.merge(\n", - " bunched,\n", - " not_bunched,\n", - " on=merge_cols,\n", - " how=\"outer\",\n", - " )\n", - "\n", - " m1 = m1.fillna(0)\n", - " m1.bunched = m1.bunched.apply(np.int64)\n", - " m1.not_bunched = m1.not_bunched.apply(np.int64)\n", - " # display(m1.info())\n", - "\n", - " # Find all of the nunique columns\n", - " m1[f\"all_{agg_col}\"] = (m1.not_bunched + m1.bunched).astype(int)\n", - "\n", - " # Filter out any rows with only one trip of that groupby combo\n", - " # for that service date\n", - " m1 = m1.loc[m1[f\"all_{agg_col}\"] > 1].reset_index(drop=True)\n", - "\n", - " # Find % of bunched and not bunched\n", - " m1[\"pct_bunched\"] = (m1.bunched / m1[f\"all_{agg_col}\"]) * 100\n", - " m1[\"pct_not_bunched\"] = (m1.not_bunched / m1[f\"all_{agg_col}\"]) * 100\n", - "\n", - " return m1" + "negative_lags = two_minutes_df.loc[two_minutes_df.actual_arrival_lag_min < 0]" ] }, { - "cell_type": "markdown", - "id": "79a7e2fe-6b59-4924-b1b6-7ec8a5965c5a", + "cell_type": "code", + "execution_count": 83, + "id": "bf2baf66-c2b0-4f7c-8b80-a7daaea7eac2", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "count 1598.00\n", + "mean -1343.79\n", + "std 168.71\n", + "min -1440.00\n", + "1% -1439.53\n", + "2% -1438.97\n", + "5% -1436.46\n", + "10% -1433.69\n", + "50% -1410.76\n", + "90% -1097.13\n", + "95% -1071.67\n", + "98% -961.42\n", + "99% -807.49\n", + "max -68.13\n", + "Name: actual_arrival_lag_min, dtype: float64\n" + ] + } + ], "source": [ - "#### Aggregate for `operator-route-direction`: How many stops are bunched?" + "print(negative_lags.actual_arrival_lag_min.describe(percentiles))" ] }, { "cell_type": "code", - "execution_count": 76, - "id": "0991636f-9901-48f4-a851-dc502a71201d", - "metadata": {}, - "outputs": [], - "source": [ - "op_route_dir_groupby_cols = [\n", - " \"caltrans_district\",\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"feed_key\",\n", - " \"organization_name\",\n", - " \"route_long_name\",\n", - " \"route_type\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "0e258012-fe88-47a5-a3d4-9b7940ff6407", - "metadata": {}, - "outputs": [], - "source": [ - "op_route_dir_m_cols = [\n", - " \"caltrans_district\",\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"feed_key\",\n", - " \"organization_name\",\n", - " \"route_long_name\",\n", - " \"route_type\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "id": "6ae48601-c329-43cb-ae1d-b8053d2c1efa", - "metadata": {}, - "outputs": [], - "source": [ - "agg_operator_route_dir1 = aggregation(\n", - " transit_matters_df1,\n", - " op_route_dir_groupby_cols,\n", - " op_route_dir_m_cols,\n", - " \"stop_id\",\n", - " \"nunique\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "13ff57a9-4572-450d-b930-8873f05e4181", - "metadata": {}, - "source": [ - "#### 2nd way of aggregating is more accurate (see SF example)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "7f0e1797-2367-4a45-b85f-b4193f8d72cc", - "metadata": {}, - "outputs": [], - "source": [ - "agg_operator_route_dir2 = aggregation(\n", - " transit_matters_df1,\n", - " op_route_dir_groupby_cols,\n", - " op_route_dir_m_cols,\n", - " \"stop_id\",\n", - " \"count\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "78a79c62-dba7-4306-9bb3-b3059bece607", - "metadata": {}, - "outputs": [], - "source": [ - "agg_operator_route_dir_2minutes = aggregation(\n", - " two_minutes_df,\n", - " op_route_dir_groupby_cols,\n", - " op_route_dir_m_cols,\n", - " \"stop_id\",\n", - " \"count\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "id": "aebc3bde-bce2-4745-90a1-810c9fa2157f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "count 3400.00\n", - "mean 0.68\n", - "std 3.71\n", - "min 0.00\n", - "1% 0.00\n", - "2% 0.00\n", - "5% 0.00\n", - "10% 0.00\n", - "50% 0.00\n", - "90% 0.88\n", - "95% 2.94\n", - "98% 7.03\n", - "99% 14.29\n", - "max 50.00\n", - "Name: pct_bunched, dtype: float64\n" - ] - } - ], - "source": [ - "print(agg_operator_route_dir_2minutes.pct_bunched.describe(percentiles))" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "d8a65b6e-07ee-441e-a0e6-170a342cede0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "count 3400.00\n", - "mean 3.44\n", - "std 11.38\n", - "min 0.00\n", - "1% 0.00\n", - "2% 0.00\n", - "5% 0.00\n", - "10% 0.00\n", - "50% 0.00\n", - "90% 5.74\n", - "95% 29.41\n", - "98% 48.98\n", - "99% 61.39\n", - "max 85.28\n", - "Name: pct_bunched, dtype: float64\n" - ] - } - ], - "source": [ - "print(agg_operator_route_dir2.pct_bunched.describe(percentiles))" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "id": "06494510-9807-4d78-a532-16f5e8b3e3d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sf_30_test = agg_operator_route_dir2.loc[\n", - " (agg_operator_route_dir2.organization_name == \"City and County of San Francisco\")\n", - " & (agg_operator_route_dir2.route_id == \"30\")\n", - "]\n", - "\n", - "sf_30_test_m = pd.melt(\n", - " sf_30_test,\n", - " id_vars=[\"organization_name\", \"route_id\", \"direction_id\"],\n", - " value_vars=[\"pct_bunched\", \"pct_not_bunched\"],\n", - ").rename(columns={\"value\": \"percentage\"})\n", - "\n", - "alt.Chart(sf_30_test_m).mark_bar().encode(\n", - " x=alt.X(\n", - " \"variable:O\",\n", - " axis=alt.Axis(labelAngle=-45),\n", - " ),\n", - " y=alt.Y(\"percentage:Q\", scale=alt.Scale(domain=[0, 100])),\n", - " color=alt.Color(\n", - " \"variable:N\",\n", - " title=\"Bunched or Not\",\n", - " scale=alt.Scale(range=color_dict[\"tri_color\"]),\n", - " ),\n", - " column=\"direction_id:N\",\n", - " tooltip=list(sf_30_test_m.columns),\n", - ").properties(\n", - " title={\n", - " \"text\": \"Total Occurences of Bunching for Operator-Route-Direction\",\n", - " },\n", - " width=200,\n", - " height=250,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "id": "5f37d405-8be4-443a-a456-f2d91d2c15ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.Chart(...)" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sf_30_test2 = agg_operator_route_dir_2minutes.loc[\n", - " (\n", - " agg_operator_route_dir_2minutes.organization_name\n", - " == \"City and County of San Francisco\"\n", - " )\n", - " & (agg_operator_route_dir_2minutes.route_id == \"30\")\n", - "]\n", - "\n", - "sf_30_test_m2 = pd.melt(\n", - " sf_30_test2,\n", - " id_vars=[\"organization_name\", \"route_id\", \"direction_id\"],\n", - " value_vars=[\"pct_bunched\", \"pct_not_bunched\"],\n", - ").rename(columns={\"value\": \"percentage\"})\n", - "\n", - "alt.Chart(sf_30_test_m2).mark_bar().encode(\n", - " x=alt.X(\n", - " \"variable:O\",\n", - " axis=alt.Axis(labelAngle=-45),\n", - " ),\n", - " y=alt.Y(\"percentage:Q\", scale=alt.Scale(domain=[0, 100])),\n", - " color=alt.Color(\n", - " \"variable:N\",\n", - " title=\"Bunched or Not\",\n", - " scale=alt.Scale(range=color_dict[\"tri_color\"]),\n", - " ),\n", - " column=\"direction_id:N\",\n", - " tooltip=list(sf_30_test_m1.columns),\n", - ").properties(\n", - " title={\n", - " \"text\": \"Total Occurences of Bunching for Operator-Route-Direction\",\n", - " },\n", - " width=200,\n", - " height=250,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 125, - "id": "182c6867-2fee-44f4-a77e-02ec76b904bf", + "execution_count": 84, + "id": "62813095-093f-49bf-913d-68f0d465e3f2", "metadata": {}, "outputs": [ { @@ -2858,6 +2738,7 @@ " \n", " \n", " \n", + " service_date\n", " caltrans_district\n", " schedule_gtfs_dataset_key\n", " feed_key\n", @@ -2866,276 +2747,336 @@ " route_type\n", " route_id\n", " direction_id\n", - " bunched\n", - " not_bunched\n", - " all_stop_id\n", - " pct_bunched\n", - " pct_not_bunched\n", + " stop_id\n", + " stop_sequence\n", + " trip_instance_key\n", + " rt_arrival_sec\n", + " scheduled_arrival_sec\n", + " headway_minutes\n", + " converted_rt_arrival\n", + " converted_schd_arrival\n", + " delay_min\n", + " rt_hour\n", + " rt_min\n", + " schd_hour\n", + " schd_min\n", + " converted_schd_arrival_tooltip\n", + " converted_rt_arrival_tooltip\n", + " actual_arrival_lag_min\n", + " bunched_y_n\n", " \n", " \n", " \n", " \n", - " 157\n", - " 04 - Oakland\n", - " 7cc0cb1871dfd558f11a2885c145d144\n", - " 7f69c2fdaa134642f14064a0b64d1495\n", - " City and County of San Francisco\n", - " STOCKTON\n", + " 122816\n", + " 2024-05-22\n", + " 07 - Los Angeles\n", + " 0666caf3ec1ecc96b74f4477ee4bc939\n", + " 608992664173210532aa3e6cc573be2f\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " Metro Local Line\n", " Bus\n", - " 30\n", - " 0.00\n", - " 222\n", - " 2993\n", - " 3215\n", - " 6.91\n", - " 93.09\n", + " 14-13172\n", + " 1.00\n", + " 13406\n", + " 77\n", + " 7c0fff930dc74bb927d64aa10dacc395\n", + " 62427\n", + " 62520.00\n", + " 12.63\n", + " 2024-05-22 17:20:27\n", + " 2024-05-22 17:22:00\n", + " -1.55\n", + " 17\n", + " 20\n", + " 17.00\n", + " 22.00\n", + " 2024-05-22 17:22:00\n", + " 2024-05-22 17:20:27\n", + " -402.10\n", + " not bunched\n", " \n", " \n", - " 158\n", - " 04 - Oakland\n", - " 7cc0cb1871dfd558f11a2885c145d144\n", - " 7f69c2fdaa134642f14064a0b64d1495\n", - " City and County of San Francisco\n", - " STOCKTON\n", + " 131788\n", + " 2024-05-22\n", + " 07 - Los Angeles\n", + " 0666caf3ec1ecc96b74f4477ee4bc939\n", + " 608992664173210532aa3e6cc573be2f\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " Metro Local Line\n", " Bus\n", - " 30\n", + " 150-13172\n", " 1.00\n", - " 104\n", - " 3102\n", - " 3206\n", - " 3.24\n", - " 96.76\n", + " 16804\n", + " 10\n", + " 948ca978e46cd771aa0429d9120a96a8\n", + " 86376\n", + " 86400.00\n", + " 20.83\n", + " 2024-05-21 23:59:36\n", + " 2024-05-23 00:00:00\n", + " -1440.40\n", + " 23\n", + " 59\n", + " 0.00\n", + " 0.00\n", + " 2024-05-23 00:00:00\n", + " 2024-05-21 23:59:36\n", + " -290.47\n", + " not bunched\n", " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " caltrans_district schedule_gtfs_dataset_key \\\n", - "157 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", - "158 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", - "\n", - " feed_key organization_name \\\n", - "157 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", - "158 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", - "\n", - " route_long_name route_type route_id direction_id bunched not_bunched \\\n", - "157 STOCKTON Bus 30 0.00 222 2993 \n", - "158 STOCKTON Bus 30 1.00 104 3102 \n", - "\n", - " all_stop_id pct_bunched pct_not_bunched \n", - "157 3215 6.91 93.09 \n", - "158 3206 3.24 96.76 " - ] - }, - "execution_count": 125, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sf_30_test" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "id": "4ae46a53-7c88-4421-98ee-8261f38f31ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
caltrans_districtschedule_gtfs_dataset_keyfeed_keyorganization_nameroute_long_nameroute_typeroute_iddirection_idbunchednot_bunchedall_stop_idpct_bunchedpct_not_bunched
3873252024-05-2207 - Los Angeles0666caf3ec1ecc96b74f4477ee4bc939608992664173210532aa3e6cc573be2fLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus233-131720.00223162583bf317904be0fc4f3727bd7698b4f48622486460.0010.432024-05-21 23:57:042024-05-23 00:01:00-1443.9323570.001.002024-05-23 00:01:002024-05-21 23:57:04-121.28not bunched
10504 - Oakland7cc0cb1871dfd558f11a2885c145d1447f69c2fdaa134642f14064a0b64d1495City and County of San FranciscoSTOCKTON3873692024-05-2207 - Los Angeles0666caf3ec1ecc96b74f4477ee4bc939608992664173210532aa3e6cc573be2fLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus30233-131720.00225861583bf317904be0fc4f3727bd7698b4f48620086400.0010.432024-05-21 23:56:402024-05-23 00:00:00-1443.3323560.000.00226298932157.0392.972024-05-23 00:00:002024-05-21 23:56:40-121.13not bunched
10604 - Oakland7cc0cb1871dfd558f11a2885c145d1447f69c2fdaa134642f14064a0b64d1495City and County of San FranciscoSTOCKTON3877642024-05-2207 - Los Angeles0666caf3ec1ecc96b74f4477ee4bc939608992664173210532aa3e6cc573be2fLos Angeles County Metropolitan Transportation AuthorityMetro Local LineBus30233-131720.00434863583bf317904be0fc4f3727bd7698b4f48625186460.0010.432024-05-21 23:57:312024-05-23 00:01:00-1443.4823570.001.00104310232063.2496.762024-05-23 00:01:002024-05-21 23:57:31-121.43not bunched
\n", "
" ], "text/plain": [ - " caltrans_district schedule_gtfs_dataset_key \\\n", - "105 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", - "106 04 - Oakland 7cc0cb1871dfd558f11a2885c145d144 \n", + " service_date caltrans_district schedule_gtfs_dataset_key \\\n", + "122816 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", + "131788 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", + "387325 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", + "387369 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", + "387764 2024-05-22 07 - Los Angeles 0666caf3ec1ecc96b74f4477ee4bc939 \n", "\n", - " feed_key organization_name \\\n", - "105 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", - "106 7f69c2fdaa134642f14064a0b64d1495 City and County of San Francisco \n", - "\n", - " route_long_name route_type route_id direction_id bunched not_bunched \\\n", - "105 STOCKTON Bus 30 0.00 226 2989 \n", - "106 STOCKTON Bus 30 1.00 104 3102 \n", - "\n", - " all_stop_id pct_bunched pct_not_bunched \n", - "105 3215 7.03 92.97 \n", - "106 3206 3.24 96.76 " - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sf_30_test2" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "bff8b8a2-8cd1-407b-adf0-87e7052adc26", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "not bunched 2993\n", - "bunched 222\n", - "Name: bunched_y_n, dtype: int64" - ] - }, - "execution_count": 131, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transit_matters_df1.loc[\n", - " (transit_matters_df1.organization_name == \"City and County of San Francisco\")\n", - " & (transit_matters_df1.route_id == \"30\")\n", - " & (transit_matters_df1.direction_id == 0)\n", - "].bunched_y_n.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "id": "653cb5a3-84ad-4cbd-9662-ed8171a945b3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "not bunched 3102\n", - "bunched 104\n", - "Name: bunched_y_n, dtype: int64" + " feed_key \\\n", + "122816 608992664173210532aa3e6cc573be2f \n", + "131788 608992664173210532aa3e6cc573be2f \n", + "387325 608992664173210532aa3e6cc573be2f \n", + "387369 608992664173210532aa3e6cc573be2f \n", + "387764 608992664173210532aa3e6cc573be2f \n", + "\n", + " organization_name \\\n", + "122816 Los Angeles County Metropolitan Transportation Authority \n", + "131788 Los Angeles County Metropolitan Transportation Authority \n", + "387325 Los Angeles County Metropolitan Transportation Authority \n", + "387369 Los Angeles County Metropolitan Transportation Authority \n", + "387764 Los Angeles County Metropolitan Transportation Authority \n", + "\n", + " route_long_name route_type route_id direction_id stop_id \\\n", + "122816 Metro Local Line Bus 14-13172 1.00 13406 \n", + "131788 Metro Local Line Bus 150-13172 1.00 16804 \n", + "387325 Metro Local Line Bus 233-13172 0.00 2231 \n", + "387369 Metro Local Line Bus 233-13172 0.00 2258 \n", + "387764 Metro Local Line Bus 233-13172 0.00 4348 \n", + "\n", + " stop_sequence trip_instance_key rt_arrival_sec \\\n", + "122816 77 7c0fff930dc74bb927d64aa10dacc395 62427 \n", + "131788 10 948ca978e46cd771aa0429d9120a96a8 86376 \n", + "387325 62 583bf317904be0fc4f3727bd7698b4f4 86224 \n", + "387369 61 583bf317904be0fc4f3727bd7698b4f4 86200 \n", + "387764 63 583bf317904be0fc4f3727bd7698b4f4 86251 \n", + "\n", + " scheduled_arrival_sec headway_minutes converted_rt_arrival \\\n", + "122816 62520.00 12.63 2024-05-22 17:20:27 \n", + "131788 86400.00 20.83 2024-05-21 23:59:36 \n", + "387325 86460.00 10.43 2024-05-21 23:57:04 \n", + "387369 86400.00 10.43 2024-05-21 23:56:40 \n", + "387764 86460.00 10.43 2024-05-21 23:57:31 \n", + "\n", + " converted_schd_arrival delay_min rt_hour rt_min schd_hour \\\n", + "122816 2024-05-22 17:22:00 -1.55 17 20 17.00 \n", + "131788 2024-05-23 00:00:00 -1440.40 23 59 0.00 \n", + "387325 2024-05-23 00:01:00 -1443.93 23 57 0.00 \n", + "387369 2024-05-23 00:00:00 -1443.33 23 56 0.00 \n", + "387764 2024-05-23 00:01:00 -1443.48 23 57 0.00 \n", + "\n", + " schd_min converted_schd_arrival_tooltip converted_rt_arrival_tooltip \\\n", + "122816 22.00 2024-05-22 17:22:00 2024-05-22 17:20:27 \n", + "131788 0.00 2024-05-23 00:00:00 2024-05-21 23:59:36 \n", + "387325 1.00 2024-05-23 00:01:00 2024-05-21 23:57:04 \n", + "387369 0.00 2024-05-23 00:00:00 2024-05-21 23:56:40 \n", + "387764 1.00 2024-05-23 00:01:00 2024-05-21 23:57:31 \n", + "\n", + " actual_arrival_lag_min bunched_y_n \n", + "122816 -402.10 not bunched \n", + "131788 -290.47 not bunched \n", + "387325 -121.28 not bunched \n", + "387369 -121.13 not bunched \n", + "387764 -121.43 not bunched " ] }, - "execution_count": 132, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transit_matters_df1.loc[\n", - " (transit_matters_df1.organization_name == \"City and County of San Francisco\")\n", - " & (transit_matters_df1.route_id == \"30\")\n", - " & (transit_matters_df1.direction_id == 1)\n", - "].bunched_y_n.value_counts()" + "negative_lags.loc[(negative_lags.actual_arrival_lag_min > -807)].head(5)" ] }, { "cell_type": "markdown", - "id": "f8da417d-b975-4e66-9a37-64980318ea41", + "id": "d37b1feb-1c2b-4ad4-9bee-94a45749d69b", "metadata": {}, "source": [ - "#### Aggregate for `operator-route-direction-rt_arrival_hour`\n", - "* Testing a couple of options to see the difference.\n", - "* Do we care how many stops experience bunching or how many trips experience bunching?\n", - "* If one stop for a trip is bunched, the whole trip is considered bunched...Is that too \"draconian\"?" + "### Aggregate\n" ] }, { "cell_type": "code", - "execution_count": 107, - "id": "aea6be3d-f024-41c0-806c-b675f19697e0", + "execution_count": 93, + "id": "30bbbbad-4587-43e3-b5c7-632079f5a588", "metadata": {}, "outputs": [], "source": [ - "op_route_dir_hour_groupby_cols = [\n", - " \"caltrans_district\",\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"feed_key\",\n", - " \"organization_name\",\n", - " \"route_long_name\",\n", - " \"route_type\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"rt_hour\",\n", - "]" + "def bunched_not_bunched(\n", + " df: pd.DataFrame, bunched_y_n: str, groupby_cols: list, agg_col: str, agg_type: str\n", + ") -> pd.DataFrame:\n", + " df2 = df.loc[df.bunched_y_n == bunched_y_n].reset_index(drop=True)\n", + "\n", + " bunched_y_n = bunched_y_n.replace(\" \", \"_\")\n", + " agg1 = (df2.groupby(groupby_cols).agg({agg_col: agg_type}).reset_index()).rename(\n", + " columns={agg_col: bunched_y_n}\n", + " )\n", + " return agg1" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "a9a430ab-c1bf-49c0-aa55-9cfc29beb941", + "metadata": {}, + "outputs": [], + "source": [ + "def aggregation(\n", + " df: pd.DataFrame, groupby_cols: list, merge_cols: list, agg_col: str, agg_type: str\n", + ") -> pd.DataFrame:\n", + " # Find total trips that are bunched\n", + " bunched = bunched_not_bunched(df, \"bunched\", groupby_cols, agg_col, agg_type)\n", + " # Find total trips that are NOT bunched\n", + " not_bunched = bunched_not_bunched(\n", + " df, \"not bunched\", groupby_cols, agg_col, agg_type\n", + " )\n", + " # Merge\n", + " m1 = pd.merge(\n", + " bunched,\n", + " not_bunched,\n", + " on=merge_cols,\n", + " how=\"outer\",\n", + " )\n", + "\n", + " m1 = m1.fillna(0)\n", + " m1.bunched = m1.bunched.apply(np.int64)\n", + " m1.not_bunched = m1.not_bunched.apply(np.int64)\n", + " # display(m1.info())\n", + "\n", + " # Find all of the nunique columns\n", + " m1[f\"all_{agg_col}\"] = (m1.not_bunched + m1.bunched).astype(int)\n", + "\n", + " # Filter out any rows with only one trip of that groupby combo\n", + " # for that service date\n", + " m1 = m1.loc[m1[f\"all_{agg_col}\"] > 1].reset_index(drop=True)\n", + "\n", + " # Find % of bunched and not bunched\n", + " m1[\"pct_bunched\"] = (m1.bunched / m1[f\"all_{agg_col}\"]) * 100\n", + " m1[\"pct_not_bunched\"] = (m1.not_bunched / m1[f\"all_{agg_col}\"]) * 100\n", + "\n", + " return m1" + ] + }, + { + "cell_type": "markdown", + "id": "4b685b31-9bad-4466-9cab-29ae4c1b88ec", + "metadata": {}, + "source": [ + "#### Transit Matters Method: Stop Grain\n", + "* [2023 report](https://static1.squarespace.com/static/533b9a24e4b01d79d0ae4376/t/6617ec40675223398aac12bf/1712843871514/TransitMatters-Bus-Bunching-Reports-Oct-2023)" ] }, { "cell_type": "code", - "execution_count": 108, - "id": "822c32e8-80b0-4981-b550-2949927f4413", + "execution_count": 96, + "id": "1ccd3b0d-eeca-463f-9fe9-5c8e174d4ec5", "metadata": {}, "outputs": [], "source": [ - "op_route_dir_hour_m_cols = [\n", + "transit_groupby_cols = [\n", " \"caltrans_district\",\n", " \"schedule_gtfs_dataset_key\",\n", " \"feed_key\",\n", @@ -3144,37 +3085,21 @@ " \"route_type\",\n", " \"route_id\",\n", " \"direction_id\",\n", - " \"rt_hour\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "d999d94d-3199-4333-8c3f-b0de3b458271", - "metadata": {}, - "outputs": [], - "source": [ - "agg_operator_route_dir_hour = aggregation(\n", - " transit_matters_df1,\n", - " op_route_dir_hour_groupby_cols,\n", - " op_route_dir_hour_m_cols,\n", " \"stop_id\",\n", - " \"nunique\",\n", - ")" + "]" ] }, { "cell_type": "code", - "execution_count": 112, - "id": "70be1dc8-0f7e-43f6-a485-1c21305b9fe2", + "execution_count": 97, + "id": "0454a30d-10dc-4117-9861-1fd2e9a3a0f7", "metadata": {}, "outputs": [], "source": [ - "agg_operator_route_dir_hour2 = aggregation(\n", + "transit_matter_ag = aggregation(\n", " transit_matters_df1,\n", - " op_route_dir_hour_groupby_cols,\n", - " op_route_dir_hour_m_cols,\n", + " transit_groupby_cols,\n", + " transit_groupby_cols,\n", " \"trip_instance_key\",\n", " \"nunique\",\n", ")" @@ -3182,49 +3107,9 @@ }, { "cell_type": "code", - "execution_count": 140, - "id": "acd00da6-180e-43ad-ab71-18a74c9e9895", - "metadata": {}, - "outputs": [], - "source": [ - "sf_49 = transit_matters_df1.loc[\n", - " (transit_matters_df1.organization_name == \"City and County of San Francisco\")\n", - " & (transit_matters_df1.route_id == \"49\")\n", - " & (transit_matters_df1.direction_id == 1)\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 147, - "id": "edfca9e0-56e2-43c4-8143-564a5ec2e20e", + "execution_count": 98, + "id": "ead5c03f-639d-4330-b055-ce93ff3c1870", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "not bunched 0.88\n", - "bunched 0.12\n", - "Name: bunched_y_n, dtype: float64" - ] - }, - "execution_count": 147, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sf_49.bunched_y_n.value_counts() / len(sf_49)" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "id": "95db972f-2509-4998-8412-bf06357c0c72", - "metadata": { - "scrolled": true, - "tags": [] - }, "outputs": [ { "data": { @@ -3247,294 +3132,227 @@ " \n", " \n", " \n", - " \n", - " stop_id\n", - " \n", - " \n", - " rt_hour\n", - " bunched_y_n\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " 0\n", - " bunched\n", - " 15\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 4\n", - " not bunched\n", - " 9\n", - " \n", - " \n", - " 5\n", - " bunched\n", - " 1\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 6\n", - " bunched\n", - " 21\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 7\n", - " bunched\n", - " 21\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 8\n", - " bunched\n", - " 24\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 9\n", - " bunched\n", - " 27\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 10\n", - " bunched\n", - " 24\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 11\n", - " bunched\n", - " 32\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 12\n", - " bunched\n", - " 22\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 13\n", - " bunched\n", - " 24\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 14\n", - " bunched\n", - " 24\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 15\n", - " bunched\n", - " 29\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 16\n", - " bunched\n", - " 22\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 17\n", - " bunched\n", - " 25\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 18\n", - " bunched\n", - " 14\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 19\n", - " bunched\n", - " 16\n", - " \n", - " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 20\n", + " caltrans_district\n", + " schedule_gtfs_dataset_key\n", + " feed_key\n", + " organization_name\n", + " route_long_name\n", + " route_type\n", + " route_id\n", + " direction_id\n", + " stop_id\n", " bunched\n", - " 11\n", + " not_bunched\n", + " all_trip_instance_key\n", + " pct_bunched\n", + " pct_not_bunched\n", " \n", + " \n", + " \n", " \n", - " not bunched\n", - " 35\n", + " 0\n", + " 03 - Marysville\n", + " 3c62ad6ee589d56eca915ce291a5df0a\n", + " 9097af5e9a0e3909ca754a46ca037919\n", + " Yolo County Transportation District\n", + " ROUTE 215 EB\n", + " Bus\n", + " eb077710-0df4-4c7a-828e-914c6769117d\n", + " 0.00\n", + " 12789766-807c-4e66-9f92-f18123353300\n", + " 1\n", + " 9\n", + " 10\n", + " 10.00\n", + " 90.00\n", " \n", " \n", - " 21\n", - " bunched\n", + " 1\n", + " 03 - Marysville\n", + " 3c62ad6ee589d56eca915ce291a5df0a\n", + " 9097af5e9a0e3909ca754a46ca037919\n", + " Yolo County Transportation District\n", + " ROUTE 215 EB\n", + " Bus\n", + " eb077710-0df4-4c7a-828e-914c6769117d\n", + " 0.00\n", + " 1842225b-1d46-4127-a9ba-2aab24a1f30b\n", + " 1\n", " 7\n", + " 8\n", + " 12.50\n", + " 87.50\n", " \n", " \n", - " not bunched\n", - " 35\n", - " \n", - " \n", - " 22\n", - " bunched\n", - " 5\n", - " \n", - " \n", - " not bunched\n", - " 35\n", + " 2\n", + " 03 - Marysville\n", + " 3c62ad6ee589d56eca915ce291a5df0a\n", + " 9097af5e9a0e3909ca754a46ca037919\n", + " Yolo County Transportation District\n", + " ROUTE 215 EB\n", + " Bus\n", + " eb077710-0df4-4c7a-828e-914c6769117d\n", + " 0.00\n", + " 339cf3c9-e4e1-4144-b60f-ed32f130c9bc\n", + " 1\n", + " 9\n", + " 10\n", + " 10.00\n", + " 90.00\n", " \n", " \n", - " 23\n", - " bunched\n", + " 3\n", + " 03 - Marysville\n", + " 3c62ad6ee589d56eca915ce291a5df0a\n", + " 9097af5e9a0e3909ca754a46ca037919\n", + " Yolo County Transportation District\n", + " ROUTE 215 EB\n", + " Bus\n", + " eb077710-0df4-4c7a-828e-914c6769117d\n", + " 0.00\n", + " 355b416e-8231-4c4d-b1d8-6c150bb8d0f2\n", " 1\n", + " 7\n", + " 8\n", + " 12.50\n", + " 87.50\n", " \n", " \n", - " not bunched\n", - " 35\n", + " 4\n", + " 03 - Marysville\n", + " 3c62ad6ee589d56eca915ce291a5df0a\n", + " 9097af5e9a0e3909ca754a46ca037919\n", + " Yolo County Transportation District\n", + " ROUTE 215 EB\n", + " Bus\n", + " eb077710-0df4-4c7a-828e-914c6769117d\n", + " 0.00\n", + " 39d50615-06d2-4a3d-a7aa-cbc50f9ee924\n", + " 1\n", + " 8\n", + " 9\n", + " 11.11\n", + " 88.89\n", " \n", " \n", "\n", "" ], "text/plain": [ - " stop_id\n", - "rt_hour bunched_y_n \n", - "0 bunched 15\n", - " not bunched 35\n", - "4 not bunched 9\n", - "5 bunched 1\n", - " not bunched 35\n", - "6 bunched 21\n", - " not bunched 35\n", - "7 bunched 21\n", - " not bunched 35\n", - "8 bunched 24\n", - " not bunched 35\n", - "9 bunched 27\n", - " not bunched 35\n", - "10 bunched 24\n", - " not bunched 35\n", - "11 bunched 32\n", - " not bunched 35\n", - "12 bunched 22\n", - " not bunched 35\n", - "13 bunched 24\n", - " not bunched 35\n", - "14 bunched 24\n", - " not bunched 35\n", - "15 bunched 29\n", - " not bunched 35\n", - "16 bunched 22\n", - " not bunched 35\n", - "17 bunched 25\n", - " not bunched 35\n", - "18 bunched 14\n", - " not bunched 35\n", - "19 bunched 16\n", - " not bunched 35\n", - "20 bunched 11\n", - " not bunched 35\n", - "21 bunched 7\n", - " not bunched 35\n", - "22 bunched 5\n", - " not bunched 35\n", - "23 bunched 1\n", - " not bunched 35" + " caltrans_district schedule_gtfs_dataset_key \\\n", + "0 03 - Marysville 3c62ad6ee589d56eca915ce291a5df0a \n", + "1 03 - Marysville 3c62ad6ee589d56eca915ce291a5df0a \n", + "2 03 - Marysville 3c62ad6ee589d56eca915ce291a5df0a \n", + "3 03 - Marysville 3c62ad6ee589d56eca915ce291a5df0a \n", + "4 03 - Marysville 3c62ad6ee589d56eca915ce291a5df0a \n", + "\n", + " feed_key organization_name \\\n", + "0 9097af5e9a0e3909ca754a46ca037919 Yolo County Transportation District \n", + "1 9097af5e9a0e3909ca754a46ca037919 Yolo County Transportation District \n", + "2 9097af5e9a0e3909ca754a46ca037919 Yolo County Transportation District \n", + "3 9097af5e9a0e3909ca754a46ca037919 Yolo County Transportation District \n", + "4 9097af5e9a0e3909ca754a46ca037919 Yolo County Transportation District \n", + "\n", + " route_long_name route_type route_id \\\n", + "0 ROUTE 215 EB Bus eb077710-0df4-4c7a-828e-914c6769117d \n", + "1 ROUTE 215 EB Bus eb077710-0df4-4c7a-828e-914c6769117d \n", + "2 ROUTE 215 EB Bus eb077710-0df4-4c7a-828e-914c6769117d \n", + "3 ROUTE 215 EB Bus eb077710-0df4-4c7a-828e-914c6769117d \n", + "4 ROUTE 215 EB Bus eb077710-0df4-4c7a-828e-914c6769117d \n", + "\n", + " direction_id stop_id bunched not_bunched \\\n", + "0 0.00 12789766-807c-4e66-9f92-f18123353300 1 9 \n", + "1 0.00 1842225b-1d46-4127-a9ba-2aab24a1f30b 1 7 \n", + "2 0.00 339cf3c9-e4e1-4144-b60f-ed32f130c9bc 1 9 \n", + "3 0.00 355b416e-8231-4c4d-b1d8-6c150bb8d0f2 1 7 \n", + "4 0.00 39d50615-06d2-4a3d-a7aa-cbc50f9ee924 1 8 \n", + "\n", + " all_trip_instance_key pct_bunched pct_not_bunched \n", + "0 10 10.00 90.00 \n", + "1 8 12.50 87.50 \n", + "2 10 10.00 90.00 \n", + "3 8 12.50 87.50 \n", + "4 9 11.11 88.89 " ] }, - "execution_count": 142, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sf_49.groupby(\n", - " [\n", - " \"rt_hour\",\n", - " \"bunched_y_n\",\n", - " ]\n", - ").agg({\"stop_id\": \"nunique\"})" + "transit_matter_ag.head()" ] }, { "cell_type": "code", - "execution_count": 150, - "id": "3d4fe456-8653-40c0-a993-987409c4cbcf", + "execution_count": 100, + "id": "8163469e-bec9-4ccd-8af3-67f31612c793", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.02857142857142857" + "(99015, 14)" ] }, - "execution_count": 150, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "1 / 35" + "transit_matter_ag.shape" ] }, { "cell_type": "code", - "execution_count": 144, - "id": "c37e285c-3087-435a-8a95-8f57170b85ee", - "metadata": { - "scrolled": true, - "tags": [] - }, + "execution_count": 101, + "id": "2ed248e8-4027-4574-8aa3-dd7fdd5b7b7a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 99015.00\n", + "mean 2.80\n", + "std 10.08\n", + "min 0.00\n", + "1% 0.00\n", + "2% 0.00\n", + "5% 0.00\n", + "10% 0.00\n", + "50% 0.00\n", + "90% 5.26\n", + "95% 11.88\n", + "98% 50.00\n", + "99% 50.00\n", + "max 91.67\n", + "Name: pct_bunched, dtype: float64" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transit_matter_ag.pct_bunched.describe(percentiles)" + ] + }, + { + "cell_type": "markdown", + "id": "23f68cdd-d8d6-4e9f-aff3-c6921a7918bd", + "metadata": {}, + "source": [ + "##### City of LA Route 30 is very bunched b/c of the scheduled headway which is much higher than waht is happening in the RT data." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "5d42aa3a-fe0a-4d77-8992-291ea50a95bc", + "metadata": {}, "outputs": [ { "data": { @@ -3557,510 +3375,645 @@ " \n", " \n", " \n", - " \n", - " trip_instance_key\n", - " \n", - " \n", - " rt_hour\n", - " bunched_y_n\n", - " \n", + " caltrans_district\n", + " schedule_gtfs_dataset_key\n", + " feed_key\n", + " organization_name\n", + " route_long_name\n", + " route_type\n", + " route_id\n", + " direction_id\n", + " stop_id\n", + " bunched\n", + " not_bunched\n", + " all_trip_instance_key\n", + " pct_bunched\n", + " pct_not_bunched\n", " \n", " \n", " \n", " \n", - " 0\n", - " bunched\n", - " 4\n", - " \n", - " \n", - " not bunched\n", - " 8\n", - " \n", - " \n", - " 4\n", - " not bunched\n", - " 2\n", - " \n", - " \n", - " 5\n", - " bunched\n", - " 1\n", - " \n", - " \n", - " not bunched\n", - " 7\n", - " \n", - " \n", - " 6\n", - " bunched\n", - " 7\n", - " \n", - " \n", - " not bunched\n", - " 13\n", - " \n", - " \n", - " 7\n", - " bunched\n", - " 13\n", - " \n", - " \n", - " not bunched\n", - " 16\n", - " \n", - " \n", - " 8\n", - " bunched\n", + " 14599\n", + " 07 - Los Angeles\n", + " cc53a0dbf5df90e3009b9cb5d89d80ba\n", + " 2cfdf0e33e9229d6b0ad124d956f5856\n", + " City of Los Angeles\n", + " CE422\n", + " Bus\n", + " 30\n", + " 0.00\n", + " 422300\n", " 11\n", - " \n", - " \n", - " not bunched\n", - " 16\n", - " \n", - " \n", - " 9\n", - " bunched\n", + " 1\n", " 12\n", + " 91.67\n", + " 8.33\n", " \n", " \n", - " not bunched\n", - " 17\n", - " \n", - " \n", - " 10\n", - " bunched\n", - " 11\n", - " \n", - " \n", - " not bunched\n", - " 17\n", - " \n", - " \n", - " 11\n", - " bunched\n", - " 14\n", - " \n", - " \n", - " not bunched\n", - " 18\n", - " \n", - " \n", - " 12\n", - " bunched\n", - " 14\n", - " \n", - " \n", - " not bunched\n", - " 17\n", - " \n", - " \n", - " 13\n", - " bunched\n", - " 11\n", - " \n", - " \n", - " not bunched\n", - " 18\n", - " \n", - " \n", - " 14\n", - " bunched\n", + " 14601\n", + " 07 - Los Angeles\n", + " cc53a0dbf5df90e3009b9cb5d89d80ba\n", + " 2cfdf0e33e9229d6b0ad124d956f5856\n", + " City of Los Angeles\n", + " CE422\n", + " Bus\n", + " 30\n", + " 0.00\n", + " 422305\n", " 11\n", - " \n", - " \n", - " not bunched\n", - " 16\n", - " \n", - " \n", - " 15\n", - " bunched\n", - " 12\n", - " \n", - " \n", - " not bunched\n", - " 17\n", - " \n", - " \n", - " 16\n", - " bunched\n", + " 1\n", " 12\n", + " 91.67\n", + " 8.33\n", " \n", " \n", - " not bunched\n", - " 19\n", - " \n", - " \n", - " 17\n", - " bunched\n", + " 14602\n", + " 07 - Los Angeles\n", + " cc53a0dbf5df90e3009b9cb5d89d80ba\n", + " 2cfdf0e33e9229d6b0ad124d956f5856\n", + " City of Los Angeles\n", + " CE422\n", + " Bus\n", + " 30\n", + " 0.00\n", + " 422306\n", " 11\n", - " \n", - " \n", - " not bunched\n", - " 17\n", - " \n", - " \n", - " 18\n", - " bunched\n", - " 6\n", - " \n", - " \n", - " not bunched\n", - " 14\n", - " \n", - " \n", - " 19\n", - " bunched\n", - " 6\n", - " \n", - " \n", - " not bunched\n", - " 12\n", - " \n", - " \n", - " 20\n", - " bunched\n", - " 6\n", - " \n", - " \n", - " not bunched\n", + " 1\n", " 12\n", + " 91.67\n", + " 8.33\n", " \n", " \n", - " 21\n", - " bunched\n", - " 5\n", - " \n", - " \n", - " not bunched\n", - " 10\n", - " \n", - " \n", - " 22\n", - " bunched\n", - " 3\n", - " \n", - " \n", - " not bunched\n", - " 9\n", - " \n", - " \n", - " 23\n", - " bunched\n", + " 14603\n", + " 07 - Los Angeles\n", + " cc53a0dbf5df90e3009b9cb5d89d80ba\n", + " 2cfdf0e33e9229d6b0ad124d956f5856\n", + " City of Los Angeles\n", + " CE422\n", + " Bus\n", + " 30\n", + " 0.00\n", + " 422309\n", + " 11\n", " 1\n", + " 12\n", + " 91.67\n", + " 8.33\n", " \n", " \n", - " not bunched\n", - " 7\n", + " 14604\n", + " 07 - Los Angeles\n", + " cc53a0dbf5df90e3009b9cb5d89d80ba\n", + " 2cfdf0e33e9229d6b0ad124d956f5856\n", + " City of Los Angeles\n", + " CE422\n", + " Bus\n", + " 30\n", + " 0.00\n", + " 422310\n", + " 11\n", + " 1\n", + " 12\n", + " 91.67\n", + " 8.33\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key\n", - "rt_hour bunched_y_n \n", - "0 bunched 4\n", - " not bunched 8\n", - "4 not bunched 2\n", - "5 bunched 1\n", - " not bunched 7\n", - "6 bunched 7\n", - " not bunched 13\n", - "7 bunched 13\n", - " not bunched 16\n", - "8 bunched 11\n", - " not bunched 16\n", - "9 bunched 12\n", - " not bunched 17\n", - "10 bunched 11\n", - " not bunched 17\n", - "11 bunched 14\n", - " not bunched 18\n", - "12 bunched 14\n", - " not bunched 17\n", - "13 bunched 11\n", - " not bunched 18\n", - "14 bunched 11\n", - " not bunched 16\n", - "15 bunched 12\n", - " not bunched 17\n", - "16 bunched 12\n", - " not bunched 19\n", - "17 bunched 11\n", - " not bunched 17\n", - "18 bunched 6\n", - " not bunched 14\n", - "19 bunched 6\n", - " not bunched 12\n", - "20 bunched 6\n", - " not bunched 12\n", - "21 bunched 5\n", - " not bunched 10\n", - "22 bunched 3\n", - " not bunched 9\n", - "23 bunched 1\n", - " not bunched 7" + " caltrans_district schedule_gtfs_dataset_key \\\n", + "14599 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "14601 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "14602 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "14603 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "14604 07 - Los Angeles cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "\n", + " feed_key organization_name route_long_name \\\n", + "14599 2cfdf0e33e9229d6b0ad124d956f5856 City of Los Angeles CE422 \n", + "14601 2cfdf0e33e9229d6b0ad124d956f5856 City of Los Angeles CE422 \n", + "14602 2cfdf0e33e9229d6b0ad124d956f5856 City of Los Angeles CE422 \n", + "14603 2cfdf0e33e9229d6b0ad124d956f5856 City of Los Angeles CE422 \n", + "14604 2cfdf0e33e9229d6b0ad124d956f5856 City of Los Angeles CE422 \n", + "\n", + " route_type route_id direction_id stop_id bunched not_bunched \\\n", + "14599 Bus 30 0.00 422300 11 1 \n", + "14601 Bus 30 0.00 422305 11 1 \n", + "14602 Bus 30 0.00 422306 11 1 \n", + "14603 Bus 30 0.00 422309 11 1 \n", + "14604 Bus 30 0.00 422310 11 1 \n", + "\n", + " all_trip_instance_key pct_bunched pct_not_bunched \n", + "14599 12 91.67 8.33 \n", + "14601 12 91.67 8.33 \n", + "14602 12 91.67 8.33 \n", + "14603 12 91.67 8.33 \n", + "14604 12 91.67 8.33 " ] }, - "execution_count": 144, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sf_49.groupby(\n", - " [\n", - " \"rt_hour\",\n", - " \"bunched_y_n\",\n", - " ]\n", - ").agg({\"trip_instance_key\": \"nunique\"})" + "transit_matter_ag.loc[transit_matter_ag.pct_bunched > 90].head()" ] }, { "cell_type": "code", - "execution_count": 152, - "id": "26ab3cba-c001-4aa7-8684-6a45492c6375", - "metadata": { - "scrolled": true, - "tags": [] - }, + "execution_count": 126, + "id": "6c56701b-ccda-45f3-a561-60ed036a19c1", + "metadata": {}, + "outputs": [], + "source": [ + "la_30_og = trips_routes_times4.loc[\n", + " (trips_routes_times4.stop_id == \"422300\")\n", + " & (trips_routes_times4.organization_name == \"City of Los Angeles\")\n", + " & (trips_routes_times4.route_id == \"30\")\n", + " ]\n" + ] + }, + { + "cell_type": "markdown", + "id": "79a7e2fe-6b59-4924-b1b6-7ec8a5965c5a", + "metadata": {}, + "source": [ + "#### Aggregate for `operator-route-direction`: How many stops are bunched?" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "0991636f-9901-48f4-a851-dc502a71201d", + "metadata": {}, + "outputs": [], + "source": [ + "op_route_dir_groupby_cols = [\n", + " \"caltrans_district\",\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"feed_key\",\n", + " \"organization_name\",\n", + " \"route_long_name\",\n", + " \"route_type\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "6ae48601-c329-43cb-ae1d-b8053d2c1efa", + "metadata": {}, + "outputs": [], + "source": [ + "agg_operator_route_dir1 = aggregation(\n", + " transit_matters_df1,\n", + " op_route_dir_groupby_cols,\n", + " op_route_dir_groupby_cols,\n", + " \"trip_instance_key\",\n", + " \"nunique\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "3bab4524-6b47-4202-bc11-c79e6be630cc", + "metadata": {}, + "outputs": [], + "source": [ + "la_30_test = agg_operator_route_dir1.loc[\n", + " (agg_operator_route_dir1.organization_name == \"City of Los Angeles\")\n", + " & (agg_operator_route_dir1.route_id == \"30\")\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "a84b8e2a-40a5-4592-960c-3f82e2405fa3", + "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], "text/plain": [ - "not bunched 144\n", - "bunched 1\n", - "Name: bunched_y_n, dtype: int64" + "alt.Chart(...)" ] }, - "execution_count": 152, + "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sf_49.loc[sf_49.rt_hour == 23].bunched_y_n.value_counts()" + "la_30_test_m = pd.melt(\n", + " la_30_test,\n", + " id_vars=[\"organization_name\", \"route_id\", \"direction_id\"],\n", + " value_vars=[\"pct_bunched\", \"pct_not_bunched\"],\n", + ").rename(columns={\"value\": \"percentage\"})\n", + "\n", + "alt.Chart(la_30_test_m).mark_bar().encode(\n", + " x=alt.X(\n", + " \"variable:O\",\n", + " axis=alt.Axis(labelAngle=-45),\n", + " ),\n", + " y=alt.Y(\"percentage:Q\", scale=alt.Scale(domain=[0, 100])),\n", + " color=alt.Color(\n", + " \"variable:N\",\n", + " title=\"Bunched or Not\",\n", + " scale=alt.Scale(range=color_dict[\"tri_color\"]),\n", + " ),\n", + " column=\"direction_id:N\",\n", + " tooltip=list(la_30_test_m.columns),\n", + ").properties(\n", + " title={\n", + " \"text\": \"Total Occurences of Bunching for Operator-Route-Direction\",\n", + " },\n", + " width=200,\n", + " height=250,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 154, - "id": "c199172a-17b0-4ded-90ee-7ae55f4e7d9d", + "execution_count": 125, + "id": "182c6867-2fee-44f4-a77e-02ec76b904bf", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", + "
\n", + "" ], "text/plain": [ - " trip_instance_key\n", - "bunched_y_n \n", - "bunched 1\n", - "not bunched 7" + "alt.Chart(...)" ] }, - "execution_count": 154, + "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sf_49.loc[sf_49.rt_hour == 23].groupby([\"bunched_y_n\"]).agg(\n", - " {\"trip_instance_key\": \"nunique\"}\n", + "sf_30_test = agg_operator_route_dir1.loc[\n", + " (agg_operator_route_dir1.organization_name == \"City and County of San Francisco\")\n", + " & (agg_operator_route_dir1.route_id == \"30\")\n", + "]\n", + "\n", + "sf_30_test_m = pd.melt(\n", + " sf_30_test,\n", + " id_vars=[\"organization_name\", \"route_id\", \"direction_id\"],\n", + " value_vars=[\"pct_bunched\", \"pct_not_bunched\"],\n", + ").rename(columns={\"value\": \"percentage\"})\n", + "\n", + "alt.Chart(sf_30_test_m).mark_bar().encode(\n", + " x=alt.X(\n", + " \"variable:O\",\n", + " axis=alt.Axis(labelAngle=-45),\n", + " ),\n", + " y=alt.Y(\"percentage:Q\", scale=alt.Scale(domain=[0, 100])),\n", + " color=alt.Color(\n", + " \"variable:N\",\n", + " title=\"Bunched or Not\",\n", + " scale=alt.Scale(range=color_dict[\"tri_color\"]),\n", + " ),\n", + " column=\"direction_id:N\",\n", + " tooltip=list(sf_30_test_m.columns),\n", + ").properties(\n", + " title={\n", + " \"text\": \"Total Occurences of Bunching for Operator-Route-Direction\",\n", + " },\n", + " width=200,\n", + " height=250,\n", ")" ] }, { "cell_type": "code", - "execution_count": 155, - "id": "8cf3d048-1e3a-4402-b85a-73a52727e6b7", + "execution_count": 161, + "id": "d2cb0fa7-6cbb-4817-9451-d99c7da86328", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", + "
\n", + "" ], "text/plain": [ - " stop_id\n", - "trip_instance_key bunched_y_n \n", - "2f788ff486bdf8ff8a3435759aebbb2f bunched 1\n", - " not bunched 29\n", - "4332a5a17f95bc642d3ff9c3cc057416 not bunched 13\n", - "6d3e80c5ab0949db88d364fec14ff85e not bunched 25\n", - "86631e2647c3a05b9d876eee470ac5e6 not bunched 8\n", - "aa38dc2d39adf3610f8149b05a900bfc not bunched 15\n", - "c8f5a562a74dcd7d9b138abe4d9a96b2 not bunched 19\n", - "def34654701618d0a4b4eefe240f9914 not bunched 35" + "alt.Chart(...)" ] }, - "execution_count": 155, + "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sf_49.loc[sf_49.rt_hour == 23].groupby([\"trip_instance_key\", \"bunched_y_n\"]).agg(\n", - " {\"stop_id\": \"nunique\"}\n", + "sf_49_test = agg_operator_route_dir1.loc[\n", + " (agg_operator_route_dir1.organization_name == \"City and County of San Francisco\")\n", + " & (agg_operator_route_dir1.route_id == \"49\")\n", + "]\n", + "\n", + "sf_49_test_m = pd.melt(\n", + " sf_49_test,\n", + " id_vars=[\"organization_name\", \"route_id\", \"direction_id\"],\n", + " value_vars=[\"pct_bunched\", \"pct_not_bunched\"],\n", + ").rename(columns={\"value\": \"percentage\"})\n", + "\n", + "alt.Chart(sf_49_test_m).mark_bar().encode(\n", + " x=alt.X(\n", + " \"variable:O\",\n", + " axis=alt.Axis(labelAngle=-45),\n", + " ),\n", + " y=alt.Y(\"percentage:Q\", scale=alt.Scale(domain=[0, 100])),\n", + " color=alt.Color(\n", + " \"variable:N\",\n", + " title=\"Bunched or Not\",\n", + " scale=alt.Scale(range=color_dict[\"tri_color\"]),\n", + " ),\n", + " column=\"direction_id:N\",\n", + " tooltip=list(sf_49_test_m.columns),\n", + ").properties(\n", + " title={\n", + " \"text\": \"Total Occurences of Bunching for Operator-Route-Direction\",\n", + " },\n", + " width=200,\n", + " height=250,\n", ")" ] }, + { + "cell_type": "markdown", + "id": "f8da417d-b975-4e66-9a37-64980318ea41", + "metadata": {}, + "source": [ + "#### Aggregate for `operator-route-direction-rt_arrival_hour`\n", + "* Testing a couple of options to see the difference.\n", + "* Do we care how many stops experience bunching or how many trips experience bunching?\n", + "* If one stop for a trip is bunched, the whole trip is considered bunched...Is that too \"draconian\"?\n", + " * Per my conversation with Katie, generally if one stop is bunched, the whole trip is considered bunched." + ] + }, { "cell_type": "code", - "execution_count": 153, - "id": "7fbaafcb-3daa-43ea-8d7a-b090919281e9", + "execution_count": 129, + "id": "aea6be3d-f024-41c0-806c-b675f19697e0", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.006944444444444444" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "op_route_dir_hour_groupby_cols = [\n", + " \"caltrans_district\",\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"feed_key\",\n", + " \"organization_name\",\n", + " \"route_long_name\",\n", + " \"route_type\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"rt_hour\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "d999d94d-3199-4333-8c3f-b0de3b458271", + "metadata": {}, + "outputs": [], "source": [ - "1 / 144" + "agg_operator_route_dir_hour = aggregation(\n", + " transit_matters_df1,\n", + " op_route_dir_hour_groupby_cols,\n", + " op_route_dir_hour_groupby_cols,\n", + " \"stop_id\",\n", + " \"nunique\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": 143, - "id": "b3fe356e-b0a0-4fe6-bd61-eaccbfa6991e", + "execution_count": 141, + "id": "70be1dc8-0f7e-43f6-a485-1c21305b9fe2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.02857142857142857" - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "1 / 35" + "agg_operator_trip_dir_hour2 = aggregation(\n", + " transit_matters_df1,\n", + " op_route_dir_hour_groupby_cols,\n", + " op_route_dir_hour_groupby_cols,\n", + " \"trip_instance_key\",\n", + " \"nunique\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 137, "id": "a4c24e52-b061-4ae5-b053-97e5ce175f5d", "metadata": {}, "outputs": [], "source": [ - "one_route49 = agg_operator_route_dir_hour.loc[\n", + "one_route49_stop_bunched = agg_operator_route_dir_hour.loc[\n", " (\n", " agg_operator_route_dir_hour.organization_name\n", " == \"City and County of San Francisco\"\n", @@ -4072,24 +4025,24 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 144, "id": "8d94893f-5766-4a77-a30d-f6661f4f1470", "metadata": {}, "outputs": [], "source": [ - "one_route49_test2 = agg_operator_route_dir_hour2.loc[\n", + "one_route49_test2_trip_bunched = agg_operator_trip_dir_hour2.loc[\n", " (\n", - " agg_operator_route_dir_hour2.organization_name\n", + " agg_operator_trip_dir_hour2.organization_name\n", " == \"City and County of San Francisco\"\n", " )\n", - " & (agg_operator_route_dir_hour2.route_id == \"49\")\n", - " & (agg_operator_route_dir_hour2.direction_id == 1)\n", + " & (agg_operator_trip_dir_hour2.route_id == \"49\")\n", + " & (agg_operator_trip_dir_hour2.direction_id == 1)\n", "]" ] }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 139, "id": "b8169ed2-dac2-491e-92fe-85ed2e61bcd2", "metadata": {}, "outputs": [ @@ -4098,23 +4051,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "