diff --git a/gtfs_utils/airtable_junction/create_junction_table.ipynb b/gtfs_utils/airtable_junction/create_junction_table.ipynb new file mode 100644 index 000000000..39850659b --- /dev/null +++ b/gtfs_utils/airtable_junction/create_junction_table.ipynb @@ -0,0 +1,2231 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Junction Tables for Assessments\n", + "\n", + "The purpose of this notebook is to create junction tables between \"checks\" and the various pieces of information the checks are performed on for the purposes of transit data assessments.\n", + "\n", + "Assessment checks are performed at the following levels:\n", + "\n", + "- Transit Provider (i.e. does the provider list GTFS on its website)\n", + "- GTFS Dataset (i.e. does it list appropriate contact info in `feed_info.txt`)\n", + "- Service representation in a GTFS Dataset (i.e. are the San Francisco cable cars appropriately represented in the SFMTA GTFS Dataset?)\n", + "\n", + "### Output\n", + "In order to make sure that each relevant record is assessed with each relevant \"check\", a cartesian join is made between the records and the checks to form a \"junction table\", which allows us to add attributes about the junction of the record and the check (i.e. a \"grade\" or score).\n", + "\n", + "This notebook will create three delimited files (sep=\"|\") corresponding to each of these levels, which can be appended to the records in the following Airtable tables:\n", + "\n", + " - `data_cross.csv` --> GTFS Dataset Check Data \n", + " - `services_cross.csv` --> gtfs-dataset Check Data \n", + " - `provider_cross.csv` --> Provider Check Data\n", + "\n", + "### Requirements \n", + "\n", + "This notebook makes use of the `pyairtable` library to access the California Transit Airtable Database. You will need read access to this database with an API key stored in a .env file in the base directory of this notebook with the following format:\n", + "\n", + "```sh\n", + "AIRTABLE_API_KEY='your_api_key_info`\n", + "```\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1 - Read data from AirTable API" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import dotenv_values\n", + "from pyairtable import Table\n", + "\n", + "API_KEY = dotenv_values(\".env\")['AIRTABLE_API_KEY']\n", + "BASE_ID = 'appjPsudTDcbLUWM5'\n", + "\n", + "GTFS_SERVICES_TABLE = 'Assessed GTFS-Services'\n", + "ASSESSED_SERVICES_TABLE = 'Assessed Services'\n", + "ASSESSED_PROVIDERS_TABLE = 'Assessed Transit Providers'\n", + "CHECKS_TABLE = 'gtfs checks'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "def airtable_to_df(table_id:str,base_id:str=BASE_ID,api_key:str=API_KEY)->pd.DataFrame:\n", + " records = Table(api_key, base_id, table_id)\n", + " airtable_rows = [] \n", + " airtable_index = []\n", + " for record in records.all():\n", + " airtable_rows.append(record[\"fields\"])\n", + " airtable_index.append(record[\"id\"])\n", + " return pd.DataFrame(airtable_rows, index=airtable_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "gtfs_services_df = airtable_to_df(GTFS_SERVICES_TABLE)\n", + "gtfs_checks_df = airtable_to_df(CHECKS_TABLE)\n", + "services_df = airtable_to_df(ASSESSED_SERVICES_TABLE)\n", + "gtfs_providers_df = airtable_to_df(ASSESSED_PROVIDERS_TABLE)" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameServicesGTFS DatasetDataset TypeCategoryService Type (from Services)ProviderOperatorDataset Producers (from GTFS Dataset)Dataset Publisher (from GTFS Dataset)...Flex Statusagency_idroute_idFares v2 StatusITP Activities (from GTFS Dataset)ITP Schedule TODO (from GTFS Dataset)network_idFares Notes (from GTFS Dataset)Schedule Comments (from GTFS Dataset)gtfs check data
rec05rHUgCVFqsXgNTulare Intermodal Express – Tulare AlertsTulare Intermodal ExpressTulare Alerts[GTFS Alerts]primary[fixed-route]Tulare County Regional Transit AgencyTulare County Regional Transit AgencyGMV Syncromatics IncGMV Syncromatics Inc...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
rec09fVK7ls8fl5spMonterey-Salinas Transit – Monterey Salinas Tr...Monterey-Salinas TransitMonterey Salinas TripUpdates[GTFS TripUpdates]primary[ADA paratransit]Monterey-Salinas TransitMonterey-Salinas TransitMonterey-Salinas TransitNaN...[Needed - Existing GTFS]NaNNaNNaNNaNNaNNaNNaNNaNNaN
rec0WcjVurdKkHx7SHealdsburg Shuttle – Bay Area 511 TripUpdatesHealdsburg ShuttleBay Area 511 TripUpdates[GTFS TripUpdates]primary[fixed-route]Sonoma CountySonoma CountyMetropolitan Transportation CommissionMetropolitan Transportation Commission...NaNSOSO:67NaNNaNNaNNaNNaNNaNNaN
rec0XvgWtnmoUl8LzGlenn Ride – Glenn ScheduleGlenn RideGlenn Schedule[GTFS Schedule]primary[deviated fixed-route, fixed-route, ADA paratr...Glenn CountyParatransit Services Inc.Trillium Inc.Trillium Inc....[In Progress - Trillium]NaNNaN[Vendor published][GTFS Into Google, GTFS Schedule Creation][Train transit provider to own]NaNNaNNaNNaN
rec0YWNONFHEO0pUlValley Express – VCTC AlertsValley ExpressVCTC Alerts[GTFS Alerts]primary[fixed-route]Ventura County Transportation CommissionVentura County Transportation CommissionGMV Syncromatics IncGMV Syncromatics Inc...NaN149NaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
reczSG6bO6il6QMLZHealdsburg Shuttle – Bay Area 511 ScheduleHealdsburg ShuttleBay Area 511 Schedule[GTFS Schedule]primary[fixed-route]Sonoma CountySonoma CountyMetropolitan Transportation CommissionMetropolitan Transportation Commission...NaNSOSO:67[Vendor published]NaNNaNNaNMTCNaNNaN
reczZKKwKsVQuU0dFPlacer County Transit – Placer SchedulePlacer County TransitPlacer Schedule[GTFS Schedule]primary[fixed-route]Placer CountyPlacer CountyPlacer County, Trillium Inc.Trillium Inc....NaNNaNNaN[Vendor published]NaNNaNNaNPublished by TrilliumNaNNaN
reczf9FxlvLopZybbTaft Area Transit – Taft ScheduleTaft Area TransitTaft Schedule[GTFS Schedule]primary[fixed-route]City of TaftCity of TaftTrillium Inc.Trillium Inc....NaNNaNNaN[Needs GTFS Schedule][GTFS Schedule Creation][Get into google, Ask Kern to own]NaNNaNNaNNaN
recziP7dvY7apKbPBKern Transit – Kern ScheduleKern TransitKern Schedule[GTFS Schedule]primary[fixed-route]Kern CountyKern CountyKern County, Trillium Inc.Trillium Inc....NaN194NaN[Vendor published]NaNNaNNaNPublished by TrilliumNaNNaN
reczmEIm3KnSr2jF3Monterey-Salinas Transit – Monterey Salinas Ve...Monterey-Salinas TransitMonterey Salinas VehiclePositions[GTFS VehiclePositions]primary[ADA paratransit]Monterey-Salinas TransitMonterey-Salinas TransitMonterey-Salinas TransitNaN...[Needed - Existing GTFS]NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

507 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " Name \\\n", + "rec05rHUgCVFqsXgN Tulare Intermodal Express – Tulare Alerts \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit – Monterey Salinas Tr... \n", + "rec0WcjVurdKkHx7S Healdsburg Shuttle – Bay Area 511 TripUpdates \n", + "rec0XvgWtnmoUl8Lz Glenn Ride – Glenn Schedule \n", + "rec0YWNONFHEO0pUl Valley Express – VCTC Alerts \n", + "... ... \n", + "reczSG6bO6il6QMLZ Healdsburg Shuttle – Bay Area 511 Schedule \n", + "reczZKKwKsVQuU0dF Placer County Transit – Placer Schedule \n", + "reczf9FxlvLopZybb Taft Area Transit – Taft Schedule \n", + "recziP7dvY7apKbPB Kern Transit – Kern Schedule \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit – Monterey Salinas Ve... \n", + "\n", + " Services \\\n", + "rec05rHUgCVFqsXgN Tulare Intermodal Express \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit \n", + "rec0WcjVurdKkHx7S Healdsburg Shuttle \n", + "rec0XvgWtnmoUl8Lz Glenn Ride \n", + "rec0YWNONFHEO0pUl Valley Express \n", + "... ... \n", + "reczSG6bO6il6QMLZ Healdsburg Shuttle \n", + "reczZKKwKsVQuU0dF Placer County Transit \n", + "reczf9FxlvLopZybb Taft Area Transit \n", + "recziP7dvY7apKbPB Kern Transit \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit \n", + "\n", + " GTFS Dataset Dataset Type \\\n", + "rec05rHUgCVFqsXgN Tulare Alerts [GTFS Alerts] \n", + "rec09fVK7ls8fl5sp Monterey Salinas TripUpdates [GTFS TripUpdates] \n", + "rec0WcjVurdKkHx7S Bay Area 511 TripUpdates [GTFS TripUpdates] \n", + "rec0XvgWtnmoUl8Lz Glenn Schedule [GTFS Schedule] \n", + "rec0YWNONFHEO0pUl VCTC Alerts [GTFS Alerts] \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ Bay Area 511 Schedule [GTFS Schedule] \n", + "reczZKKwKsVQuU0dF Placer Schedule [GTFS Schedule] \n", + "reczf9FxlvLopZybb Taft Schedule [GTFS Schedule] \n", + "recziP7dvY7apKbPB Kern Schedule [GTFS Schedule] \n", + "reczmEIm3KnSr2jF3 Monterey Salinas VehiclePositions [GTFS VehiclePositions] \n", + "\n", + " Category Service Type (from Services) \\\n", + "rec05rHUgCVFqsXgN primary [fixed-route] \n", + "rec09fVK7ls8fl5sp primary [ADA paratransit] \n", + "rec0WcjVurdKkHx7S primary [fixed-route] \n", + "rec0XvgWtnmoUl8Lz primary [deviated fixed-route, fixed-route, ADA paratr... \n", + "rec0YWNONFHEO0pUl primary [fixed-route] \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ primary [fixed-route] \n", + "reczZKKwKsVQuU0dF primary [fixed-route] \n", + "reczf9FxlvLopZybb primary [fixed-route] \n", + "recziP7dvY7apKbPB primary [fixed-route] \n", + "reczmEIm3KnSr2jF3 primary [ADA paratransit] \n", + "\n", + " Provider \\\n", + "rec05rHUgCVFqsXgN Tulare County Regional Transit Agency \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit \n", + "rec0WcjVurdKkHx7S Sonoma County \n", + "rec0XvgWtnmoUl8Lz Glenn County \n", + "rec0YWNONFHEO0pUl Ventura County Transportation Commission \n", + "... ... \n", + "reczSG6bO6il6QMLZ Sonoma County \n", + "reczZKKwKsVQuU0dF Placer County \n", + "reczf9FxlvLopZybb City of Taft \n", + "recziP7dvY7apKbPB Kern County \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit \n", + "\n", + " Operator \\\n", + "rec05rHUgCVFqsXgN Tulare County Regional Transit Agency \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit \n", + "rec0WcjVurdKkHx7S Sonoma County \n", + "rec0XvgWtnmoUl8Lz Paratransit Services Inc. \n", + "rec0YWNONFHEO0pUl Ventura County Transportation Commission \n", + "... ... \n", + "reczSG6bO6il6QMLZ Sonoma County \n", + "reczZKKwKsVQuU0dF Placer County \n", + "reczf9FxlvLopZybb City of Taft \n", + "recziP7dvY7apKbPB Kern County \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit \n", + "\n", + " Dataset Producers (from GTFS Dataset) \\\n", + "rec05rHUgCVFqsXgN GMV Syncromatics Inc \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit \n", + "rec0WcjVurdKkHx7S Metropolitan Transportation Commission \n", + "rec0XvgWtnmoUl8Lz Trillium Inc. \n", + "rec0YWNONFHEO0pUl GMV Syncromatics Inc \n", + "... ... \n", + "reczSG6bO6il6QMLZ Metropolitan Transportation Commission \n", + "reczZKKwKsVQuU0dF Placer County, Trillium Inc. \n", + "reczf9FxlvLopZybb Trillium Inc. \n", + "recziP7dvY7apKbPB Kern County, Trillium Inc. \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit \n", + "\n", + " Dataset Publisher (from GTFS Dataset) ... \\\n", + "rec05rHUgCVFqsXgN GMV Syncromatics Inc ... \n", + "rec09fVK7ls8fl5sp NaN ... \n", + "rec0WcjVurdKkHx7S Metropolitan Transportation Commission ... \n", + "rec0XvgWtnmoUl8Lz Trillium Inc. ... \n", + "rec0YWNONFHEO0pUl GMV Syncromatics Inc ... \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ Metropolitan Transportation Commission ... \n", + "reczZKKwKsVQuU0dF Trillium Inc. ... \n", + "reczf9FxlvLopZybb Trillium Inc. ... \n", + "recziP7dvY7apKbPB Trillium Inc. ... \n", + "reczmEIm3KnSr2jF3 NaN ... \n", + "\n", + " Flex Status agency_id route_id \\\n", + "rec05rHUgCVFqsXgN NaN NaN NaN \n", + "rec09fVK7ls8fl5sp [Needed - Existing GTFS] NaN NaN \n", + "rec0WcjVurdKkHx7S NaN SO SO:67 \n", + "rec0XvgWtnmoUl8Lz [In Progress - Trillium] NaN NaN \n", + "rec0YWNONFHEO0pUl NaN 149 NaN \n", + "... ... ... ... \n", + "reczSG6bO6il6QMLZ NaN SO SO:67 \n", + "reczZKKwKsVQuU0dF NaN NaN NaN \n", + "reczf9FxlvLopZybb NaN NaN NaN \n", + "recziP7dvY7apKbPB NaN 194 NaN \n", + "reczmEIm3KnSr2jF3 [Needed - Existing GTFS] NaN NaN \n", + "\n", + " Fares v2 Status \\\n", + "rec05rHUgCVFqsXgN NaN \n", + "rec09fVK7ls8fl5sp NaN \n", + "rec0WcjVurdKkHx7S NaN \n", + "rec0XvgWtnmoUl8Lz [Vendor published] \n", + "rec0YWNONFHEO0pUl NaN \n", + "... ... \n", + "reczSG6bO6il6QMLZ [Vendor published] \n", + "reczZKKwKsVQuU0dF [Vendor published] \n", + "reczf9FxlvLopZybb [Needs GTFS Schedule] \n", + "recziP7dvY7apKbPB [Vendor published] \n", + "reczmEIm3KnSr2jF3 NaN \n", + "\n", + " ITP Activities (from GTFS Dataset) \\\n", + "rec05rHUgCVFqsXgN NaN \n", + "rec09fVK7ls8fl5sp NaN \n", + "rec0WcjVurdKkHx7S NaN \n", + "rec0XvgWtnmoUl8Lz [GTFS Into Google, GTFS Schedule Creation] \n", + "rec0YWNONFHEO0pUl NaN \n", + "... ... \n", + "reczSG6bO6il6QMLZ NaN \n", + "reczZKKwKsVQuU0dF NaN \n", + "reczf9FxlvLopZybb [GTFS Schedule Creation] \n", + "recziP7dvY7apKbPB NaN \n", + "reczmEIm3KnSr2jF3 NaN \n", + "\n", + " ITP Schedule TODO (from GTFS Dataset) network_id \\\n", + "rec05rHUgCVFqsXgN NaN NaN \n", + "rec09fVK7ls8fl5sp NaN NaN \n", + "rec0WcjVurdKkHx7S NaN NaN \n", + "rec0XvgWtnmoUl8Lz [Train transit provider to own] NaN \n", + "rec0YWNONFHEO0pUl NaN NaN \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ NaN NaN \n", + "reczZKKwKsVQuU0dF NaN NaN \n", + "reczf9FxlvLopZybb [Get into google, Ask Kern to own] NaN \n", + "recziP7dvY7apKbPB NaN NaN \n", + "reczmEIm3KnSr2jF3 NaN NaN \n", + "\n", + " Fares Notes (from GTFS Dataset) \\\n", + "rec05rHUgCVFqsXgN NaN \n", + "rec09fVK7ls8fl5sp NaN \n", + "rec0WcjVurdKkHx7S NaN \n", + "rec0XvgWtnmoUl8Lz NaN \n", + "rec0YWNONFHEO0pUl NaN \n", + "... ... \n", + "reczSG6bO6il6QMLZ MTC \n", + "reczZKKwKsVQuU0dF Published by Trillium \n", + "reczf9FxlvLopZybb NaN \n", + "recziP7dvY7apKbPB Published by Trillium \n", + "reczmEIm3KnSr2jF3 NaN \n", + "\n", + " Schedule Comments (from GTFS Dataset) gtfs check data \n", + "rec05rHUgCVFqsXgN NaN NaN \n", + "rec09fVK7ls8fl5sp NaN NaN \n", + "rec0WcjVurdKkHx7S NaN NaN \n", + "rec0XvgWtnmoUl8Lz NaN NaN \n", + "rec0YWNONFHEO0pUl NaN NaN \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ NaN NaN \n", + "reczZKKwKsVQuU0dF NaN NaN \n", + "reczf9FxlvLopZybb NaN NaN \n", + "recziP7dvY7apKbPB NaN NaN \n", + "reczmEIm3KnSr2jF3 NaN NaN \n", + "\n", + "[507 rows x 28 columns]" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtfs_services_df" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Check #DescriptionScopeSourceSource: medium-termSource: goalMax ScoreScore Typegtfs-service check data copyScoring Criteriagtfs check datagtfs-dataset check data copy
rec4Ah8sgs8F243XH16TripUpdates PublishedTripUpdates Dataset[human][auto][auto]5.0Nominal[reczJXpgSxkKqzrFg, recAarP7F7LPwtlcn, reczYzu...NaNNaNNaN
rec5GIXSm4aAcpuZj20Publish to OpenMobilityDataTripUpdates Dataset[human][human][human]1.0Boolean[recjjCy0uuLGk6RSq, recXNqjW35Dq3omAX, reclalk...There or not.\\n\\nNaNNaN
rec5ahg0uEPYRoMEX26Publish to TransitLandVehiclePositions Dataset[human][human][human]1.0Boolean[recw95VXxi1w2rmOC, recWbsiIF3zlT2u5d, recuuLV...There or not.\\n\\nNaNNaN
rec6i4tZoKfMLAWiK22Reasonable API Key ProcessTripUpdates Dataset[human][human][human]2.0Nominal[rec5b8Xd4UbUciyH7, recNp2w2UUdwfEB6j, recskZe...NaNNaNNaN
recBWkZKdzqEIDE4613GTFS Grading Scheme ScoreService within GTFS Schedule Dataset[human][gtfs-trained human][human]3.0ContinuousNaN3\\* Grading scheme score / Max grading scheme ...[recuql0JQWFcn7KFs, recxItR7wCdB51vjA, rec4ivy...NaN
\n", + "
" + ], + "text/plain": [ + " Check # Description \\\n", + "rec4Ah8sgs8F243XH 16 TripUpdates Published \n", + "rec5GIXSm4aAcpuZj 20 Publish to OpenMobilityData \n", + "rec5ahg0uEPYRoMEX 26 Publish to TransitLand \n", + "rec6i4tZoKfMLAWiK 22 Reasonable API Key Process \n", + "recBWkZKdzqEIDE46 13 GTFS Grading Scheme Score \n", + "\n", + " Scope Source \\\n", + "rec4Ah8sgs8F243XH TripUpdates Dataset [human] \n", + "rec5GIXSm4aAcpuZj TripUpdates Dataset [human] \n", + "rec5ahg0uEPYRoMEX VehiclePositions Dataset [human] \n", + "rec6i4tZoKfMLAWiK TripUpdates Dataset [human] \n", + "recBWkZKdzqEIDE46 Service within GTFS Schedule Dataset [human] \n", + "\n", + " Source: medium-term Source: goal Max Score Score Type \\\n", + "rec4Ah8sgs8F243XH [auto] [auto] 5.0 Nominal \n", + "rec5GIXSm4aAcpuZj [human] [human] 1.0 Boolean \n", + "rec5ahg0uEPYRoMEX [human] [human] 1.0 Boolean \n", + "rec6i4tZoKfMLAWiK [human] [human] 2.0 Nominal \n", + "recBWkZKdzqEIDE46 [gtfs-trained human] [human] 3.0 Continuous \n", + "\n", + " gtfs-service check data copy \\\n", + "rec4Ah8sgs8F243XH [reczJXpgSxkKqzrFg, recAarP7F7LPwtlcn, reczYzu... \n", + "rec5GIXSm4aAcpuZj [recjjCy0uuLGk6RSq, recXNqjW35Dq3omAX, reclalk... \n", + "rec5ahg0uEPYRoMEX [recw95VXxi1w2rmOC, recWbsiIF3zlT2u5d, recuuLV... \n", + "rec6i4tZoKfMLAWiK [rec5b8Xd4UbUciyH7, recNp2w2UUdwfEB6j, recskZe... \n", + "recBWkZKdzqEIDE46 NaN \n", + "\n", + " Scoring Criteria \\\n", + "rec4Ah8sgs8F243XH NaN \n", + "rec5GIXSm4aAcpuZj There or not.\\n\\n \n", + "rec5ahg0uEPYRoMEX There or not.\\n\\n \n", + "rec6i4tZoKfMLAWiK NaN \n", + "recBWkZKdzqEIDE46 3\\* Grading scheme score / Max grading scheme ... \n", + "\n", + " gtfs check data \\\n", + "rec4Ah8sgs8F243XH NaN \n", + "rec5GIXSm4aAcpuZj NaN \n", + "rec5ahg0uEPYRoMEX NaN \n", + "rec6i4tZoKfMLAWiK NaN \n", + "recBWkZKdzqEIDE46 [recuql0JQWFcn7KFs, recxItR7wCdB51vjA, rec4ivy... \n", + "\n", + " gtfs-dataset check data copy \n", + "rec4Ah8sgs8F243XH NaN \n", + "rec5GIXSm4aAcpuZj NaN \n", + "rec5ahg0uEPYRoMEX NaN \n", + "rec6i4tZoKfMLAWiK NaN \n", + "recBWkZKdzqEIDE46 NaN " + ] + }, + "execution_count": 238, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtfs_checks_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Nameservice_idProviderOperatorCurrently OperatingModeService TypeNotesFunding SourcesFunding Category...Service Area TypeService Area Buffer (miles)Service Availability CategoryParatransit Fororganization stack componentsITP Schedule TODO (from GTFS Dataset) (from GTFS Services Association)ITP Activities (from GTFS Dataset) (from GTFS Services Association)Schedule Comments (from GTFS Dataset) (from GTFS Services Association)Season StartSeason End
rec0OCKo3fwRLpPIhWatsonville Circulatorrec6lgJrPslFjSXdkSanta Cruz Metropolitan Transit DistrictSanta Cruz Metropolitan Transit DistrictTrue[bus][fixed-route]ETRO’s new Watsonville Circulator Route is des...Caltrans[public]...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
rec0ZVZt8PTzF2zuYTopanga Beach Busrec00Di9RibzcrxHSLos Angeles CountyLos Angeles CountyTrue[bus][fixed-route]Connects Metro Orange Line to Topanga Beach an...Caltrans[public]...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
rec0ikYugqTjhLBkgImperial Valley TransitrecMR1zH6QMLPSriVImperial County Transportation CommissionImperial County Transportation CommissionTrue[bus][fixed-route, deviated fixed-route]NaNCaltrans[public]...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
rec0n7D4vtGfWZVEcCitylinerecaTB1mtROhjUHNRCity of West HollywoodCity of West HollywoodTrue[bus][fixed-route]West Hollywood’s own free shuttle\\n\\nCityline ...Caltrans[public]...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
rec14ZBuXpyw4OLY2Avocado Heights/Bassett/West Valinda ShuttlerecHJivXoZGV1KhZeLos Angeles CountyLos Angeles CountyTrue[bus][fixed-route]The Avocado Heights/Bassett/West Valinda Shutt...Caltrans[public]...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

5 rows × 41 columns

\n", + "
" + ], + "text/plain": [ + " Name \\\n", + "rec0OCKo3fwRLpPIh Watsonville Circulator \n", + "rec0ZVZt8PTzF2zuY Topanga Beach Bus \n", + "rec0ikYugqTjhLBkg Imperial Valley Transit \n", + "rec0n7D4vtGfWZVEc Cityline \n", + "rec14ZBuXpyw4OLY2 Avocado Heights/Bassett/West Valinda Shuttle \n", + "\n", + " service_id \\\n", + "rec0OCKo3fwRLpPIh rec6lgJrPslFjSXdk \n", + "rec0ZVZt8PTzF2zuY rec00Di9RibzcrxHS \n", + "rec0ikYugqTjhLBkg recMR1zH6QMLPSriV \n", + "rec0n7D4vtGfWZVEc recaTB1mtROhjUHNR \n", + "rec14ZBuXpyw4OLY2 recHJivXoZGV1KhZe \n", + "\n", + " Provider \\\n", + "rec0OCKo3fwRLpPIh Santa Cruz Metropolitan Transit District \n", + "rec0ZVZt8PTzF2zuY Los Angeles County \n", + "rec0ikYugqTjhLBkg Imperial County Transportation Commission \n", + "rec0n7D4vtGfWZVEc City of West Hollywood \n", + "rec14ZBuXpyw4OLY2 Los Angeles County \n", + "\n", + " Operator \\\n", + "rec0OCKo3fwRLpPIh Santa Cruz Metropolitan Transit District \n", + "rec0ZVZt8PTzF2zuY Los Angeles County \n", + "rec0ikYugqTjhLBkg Imperial County Transportation Commission \n", + "rec0n7D4vtGfWZVEc City of West Hollywood \n", + "rec14ZBuXpyw4OLY2 Los Angeles County \n", + "\n", + " Currently Operating Mode \\\n", + "rec0OCKo3fwRLpPIh True [bus] \n", + "rec0ZVZt8PTzF2zuY True [bus] \n", + "rec0ikYugqTjhLBkg True [bus] \n", + "rec0n7D4vtGfWZVEc True [bus] \n", + "rec14ZBuXpyw4OLY2 True [bus] \n", + "\n", + " Service Type \\\n", + "rec0OCKo3fwRLpPIh [fixed-route] \n", + "rec0ZVZt8PTzF2zuY [fixed-route] \n", + "rec0ikYugqTjhLBkg [fixed-route, deviated fixed-route] \n", + "rec0n7D4vtGfWZVEc [fixed-route] \n", + "rec14ZBuXpyw4OLY2 [fixed-route] \n", + "\n", + " Notes \\\n", + "rec0OCKo3fwRLpPIh ETRO’s new Watsonville Circulator Route is des... \n", + "rec0ZVZt8PTzF2zuY Connects Metro Orange Line to Topanga Beach an... \n", + "rec0ikYugqTjhLBkg NaN \n", + "rec0n7D4vtGfWZVEc West Hollywood’s own free shuttle\\n\\nCityline ... \n", + "rec14ZBuXpyw4OLY2 The Avocado Heights/Bassett/West Valinda Shutt... \n", + "\n", + " Funding Sources Funding Category ... Service Area Type \\\n", + "rec0OCKo3fwRLpPIh Caltrans [public] ... NaN \n", + "rec0ZVZt8PTzF2zuY Caltrans [public] ... NaN \n", + "rec0ikYugqTjhLBkg Caltrans [public] ... NaN \n", + "rec0n7D4vtGfWZVEc Caltrans [public] ... NaN \n", + "rec14ZBuXpyw4OLY2 Caltrans [public] ... NaN \n", + "\n", + " Service Area Buffer (miles) Service Availability Category \\\n", + "rec0OCKo3fwRLpPIh NaN NaN \n", + "rec0ZVZt8PTzF2zuY NaN NaN \n", + "rec0ikYugqTjhLBkg NaN NaN \n", + "rec0n7D4vtGfWZVEc NaN NaN \n", + "rec14ZBuXpyw4OLY2 NaN NaN \n", + "\n", + " Paratransit For organization stack components \\\n", + "rec0OCKo3fwRLpPIh NaN NaN \n", + "rec0ZVZt8PTzF2zuY NaN NaN \n", + "rec0ikYugqTjhLBkg NaN NaN \n", + "rec0n7D4vtGfWZVEc NaN NaN \n", + "rec14ZBuXpyw4OLY2 NaN NaN \n", + "\n", + " ITP Schedule TODO (from GTFS Dataset) (from GTFS Services Association) \\\n", + "rec0OCKo3fwRLpPIh NaN \n", + "rec0ZVZt8PTzF2zuY NaN \n", + "rec0ikYugqTjhLBkg NaN \n", + "rec0n7D4vtGfWZVEc NaN \n", + "rec14ZBuXpyw4OLY2 NaN \n", + "\n", + " ITP Activities (from GTFS Dataset) (from GTFS Services Association) \\\n", + "rec0OCKo3fwRLpPIh NaN \n", + "rec0ZVZt8PTzF2zuY NaN \n", + "rec0ikYugqTjhLBkg NaN \n", + "rec0n7D4vtGfWZVEc NaN \n", + "rec14ZBuXpyw4OLY2 NaN \n", + "\n", + " Schedule Comments (from GTFS Dataset) (from GTFS Services Association) \\\n", + "rec0OCKo3fwRLpPIh NaN \n", + "rec0ZVZt8PTzF2zuY NaN \n", + "rec0ikYugqTjhLBkg NaN \n", + "rec0n7D4vtGfWZVEc NaN \n", + "rec14ZBuXpyw4OLY2 NaN \n", + "\n", + " Season Start Season End \n", + "rec0OCKo3fwRLpPIh NaN NaN \n", + "rec0ZVZt8PTzF2zuY NaN NaN \n", + "rec0ikYugqTjhLBkg NaN NaN \n", + "rec0n7D4vtGfWZVEc NaN NaN \n", + "rec14ZBuXpyw4OLY2 NaN NaN \n", + "\n", + "[5 rows x 41 columns]" + ] + }, + "execution_count": 239, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "services_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAlias'organization_idITP IDTracking Cat - ogReporting Cat - ogAssist Cat - ogCaltrans DistrictMPO/RTPAPlanning Authority...Count of Services with Complete Realtime StatusAt least one GTFS feed for any service (1=yes)At least on complete RT set (1=yes)Complete static GTFS coverage (1=yes)Complete RT coverage (1=yes)provider check dataProvider AssessmentsDetailsWebsiteContracts Held
rec0ZHctuUYh5wtLSSan Luis Obispo Regional Transit Authority[SLORTA]reciakGBN1DP9dK9N289.0ActiveCoreWhite Glove05 - San Luis ObispoSan Luis Obispo Council of GovernmentsSan Luis Obispo Council of Governments...01000[recJpdz8eCeRsUQSj, recK3xMG6XsnQpYwD, recX8ke...[recIUkhYnikNkX12h]NaNNaNNaN
rec0dtZWydv7z5afXCity of Taft[TAT]recgTxL1xyvgC8e9k330.0ActiveCoreWhite Glove06 - FresnoKern Council of GovernmentsKern Council of Governments...01010[rec9P4eMWAQ2GaF5A, recYKDCjjLJbzcJTM, recLVPL...[recp38jhkZVctheDK]NaNNaNNaN
rec0qKtbrBvE1AyReCity of CerritosNaNrecXYM27Lts8XF98x63.0ActiveCoreWhite Glove07 - Los AngelesSouthern California Association of GovernmentsSouthern California Association of Governments...00000[receMU82jZycJkGFe, recwhLFXaVN5DcqQ1, recXbUF...[recJMNAE1xFtv9B60]NaNNaNNaN
rec0yf9AiAqnwpaITCity of DelanoNaNrecROsnN85RlZziSj91.0ActiveCoreWhite Glove06 - FresnoKern Council of GovernmentsKern Council of Governments...01010[recxPm0wCZJj0jtR0, recyeyK9Brn5UV85M, recHjg7...[rec90UiwBWjFXvdt9]NaNNaNNaN
rec1gD38VBhjtMssTImperial County Transportation CommissionNaNrec38PbjPbEy2Tvdu138.0ActiveCoreWhite Glove11 - San DiegoSouthern California Association of GovernmentsSouthern California Association of Governments...00000[recT2TIZ0r3FHdCwQ, recgcfRSpfLffxTDf, recSF6B...[reczToe4t1ynsYs5T]NaNNaNNaN
\n", + "

5 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " Name Alias' \\\n", + "rec0ZHctuUYh5wtLS San Luis Obispo Regional Transit Authority [SLORTA] \n", + "rec0dtZWydv7z5afX City of Taft [TAT] \n", + "rec0qKtbrBvE1AyRe City of Cerritos NaN \n", + "rec0yf9AiAqnwpaIT City of Delano NaN \n", + "rec1gD38VBhjtMssT Imperial County Transportation Commission NaN \n", + "\n", + " organization_id ITP ID Tracking Cat - og \\\n", + "rec0ZHctuUYh5wtLS reciakGBN1DP9dK9N 289.0 Active \n", + "rec0dtZWydv7z5afX recgTxL1xyvgC8e9k 330.0 Active \n", + "rec0qKtbrBvE1AyRe recXYM27Lts8XF98x 63.0 Active \n", + "rec0yf9AiAqnwpaIT recROsnN85RlZziSj 91.0 Active \n", + "rec1gD38VBhjtMssT rec38PbjPbEy2Tvdu 138.0 Active \n", + "\n", + " Reporting Cat - og Assist Cat - og Caltrans District \\\n", + "rec0ZHctuUYh5wtLS Core White Glove 05 - San Luis Obispo \n", + "rec0dtZWydv7z5afX Core White Glove 06 - Fresno \n", + "rec0qKtbrBvE1AyRe Core White Glove 07 - Los Angeles \n", + "rec0yf9AiAqnwpaIT Core White Glove 06 - Fresno \n", + "rec1gD38VBhjtMssT Core White Glove 11 - San Diego \n", + "\n", + " MPO/RTPA \\\n", + "rec0ZHctuUYh5wtLS San Luis Obispo Council of Governments \n", + "rec0dtZWydv7z5afX Kern Council of Governments \n", + "rec0qKtbrBvE1AyRe Southern California Association of Governments \n", + "rec0yf9AiAqnwpaIT Kern Council of Governments \n", + "rec1gD38VBhjtMssT Southern California Association of Governments \n", + "\n", + " Planning Authority ... \\\n", + "rec0ZHctuUYh5wtLS San Luis Obispo Council of Governments ... \n", + "rec0dtZWydv7z5afX Kern Council of Governments ... \n", + "rec0qKtbrBvE1AyRe Southern California Association of Governments ... \n", + "rec0yf9AiAqnwpaIT Kern Council of Governments ... \n", + "rec1gD38VBhjtMssT Southern California Association of Governments ... \n", + "\n", + " Count of Services with Complete Realtime Status \\\n", + "rec0ZHctuUYh5wtLS 0 \n", + "rec0dtZWydv7z5afX 0 \n", + "rec0qKtbrBvE1AyRe 0 \n", + "rec0yf9AiAqnwpaIT 0 \n", + "rec1gD38VBhjtMssT 0 \n", + "\n", + " At least one GTFS feed for any service (1=yes) \\\n", + "rec0ZHctuUYh5wtLS 1 \n", + "rec0dtZWydv7z5afX 1 \n", + "rec0qKtbrBvE1AyRe 0 \n", + "rec0yf9AiAqnwpaIT 1 \n", + "rec1gD38VBhjtMssT 0 \n", + "\n", + " At least on complete RT set (1=yes) \\\n", + "rec0ZHctuUYh5wtLS 0 \n", + "rec0dtZWydv7z5afX 0 \n", + "rec0qKtbrBvE1AyRe 0 \n", + "rec0yf9AiAqnwpaIT 0 \n", + "rec1gD38VBhjtMssT 0 \n", + "\n", + " Complete static GTFS coverage (1=yes) \\\n", + "rec0ZHctuUYh5wtLS 0 \n", + "rec0dtZWydv7z5afX 1 \n", + "rec0qKtbrBvE1AyRe 0 \n", + "rec0yf9AiAqnwpaIT 1 \n", + "rec1gD38VBhjtMssT 0 \n", + "\n", + " Complete RT coverage (1=yes) \\\n", + "rec0ZHctuUYh5wtLS 0 \n", + "rec0dtZWydv7z5afX 0 \n", + "rec0qKtbrBvE1AyRe 0 \n", + "rec0yf9AiAqnwpaIT 0 \n", + "rec1gD38VBhjtMssT 0 \n", + "\n", + " provider check data \\\n", + "rec0ZHctuUYh5wtLS [recJpdz8eCeRsUQSj, recK3xMG6XsnQpYwD, recX8ke... \n", + "rec0dtZWydv7z5afX [rec9P4eMWAQ2GaF5A, recYKDCjjLJbzcJTM, recLVPL... \n", + "rec0qKtbrBvE1AyRe [receMU82jZycJkGFe, recwhLFXaVN5DcqQ1, recXbUF... \n", + "rec0yf9AiAqnwpaIT [recxPm0wCZJj0jtR0, recyeyK9Brn5UV85M, recHjg7... \n", + "rec1gD38VBhjtMssT [recT2TIZ0r3FHdCwQ, recgcfRSpfLffxTDf, recSF6B... \n", + "\n", + " Provider Assessments Details Website Contracts Held \n", + "rec0ZHctuUYh5wtLS [recIUkhYnikNkX12h] NaN NaN NaN \n", + "rec0dtZWydv7z5afX [recp38jhkZVctheDK] NaN NaN NaN \n", + "rec0qKtbrBvE1AyRe [recJMNAE1xFtv9B60] NaN NaN NaN \n", + "rec0yf9AiAqnwpaIT [rec90UiwBWjFXvdt9] NaN NaN NaN \n", + "rec1gD38VBhjtMssT [reczToe4t1ynsYs5T] NaN NaN NaN \n", + "\n", + "[5 rows x 33 columns]" + ] + }, + "execution_count": 240, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtfs_providers_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2 - Create Junction Tables\n", + "\n", + "Airtable doesn't have a good way of auto-generating a junction table (a table associating records from two other tables) based on two other tables and a set of conditions. \n", + "\n", + "The following codes createsa \"cartesian product\" (every record to every record) junction table based on exports of two airtable tables and then selecting which association records are relevant (i.e. only checks that apply to GTFS Schedule should be associated with GTFS Schedule).\n", + "\n", + "The resulting delimeted file can be pasted into a spreadsheet which can then be based into Airtable. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Services" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Check #ServicesDataset Type
138232AC TransitGTFS Alerts
24613AC TransitGTFS Schedule
53010AC TransitGTFS Schedule
8148AC TransitGTFS Schedule
10987AC TransitGTFS Schedule
............
24549the Link-Baldwin Hills ParklandsGTFS Schedule
273814the Link-Baldwin Hills ParklandsGTFS Schedule
302211the Link-Baldwin Hills ParklandsGTFS Schedule
217018the Link-Baldwin Hills ParklandsGTFS TripUpdates
188625the Link-Baldwin Hills ParklandsGTFS VehiclePositions
\n", + "

3124 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Check # Services Dataset Type\n", + "1382 32 AC Transit GTFS Alerts\n", + "246 13 AC Transit GTFS Schedule\n", + "530 10 AC Transit GTFS Schedule\n", + "814 8 AC Transit GTFS Schedule\n", + "1098 7 AC Transit GTFS Schedule\n", + "... ... ... ...\n", + "2454 9 the Link-Baldwin Hills Parklands GTFS Schedule\n", + "2738 14 the Link-Baldwin Hills Parklands GTFS Schedule\n", + "3022 11 the Link-Baldwin Hills Parklands GTFS Schedule\n", + "2170 18 the Link-Baldwin Hills Parklands GTFS TripUpdates\n", + "1886 25 the Link-Baldwin Hills Parklands GTFS VehiclePositions\n", + "\n", + "[3124 rows x 3 columns]" + ] + }, + "execution_count": 241, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "services_df = services_df.rename(columns={\"Name\":\"Services\"})\n", + "\n", + "service_checks = [\n", + " \"Service within GTFS Schedule Dataset\",\n", + " \"Service within TripUpdates Dataset\",\n", + " \"Service within VehiclePositions Dataset\",\n", + " \"Service within Alerts Dataset\"\n", + "]\n", + "\n", + "service_checks_df = gtfs_checks_df.loc[gtfs_checks_df[\"Scope\"].isin(service_checks),[\"Check #\",\"Scope\"]]\n", + "\n", + "# cartesian product of service checks and services\n", + "service_checks_cross_df = service_checks_df.merge(services_df[\"Services\"], how='cross')\n", + "service_checks_cross_df = service_checks_cross_df.sort_values([\"Services\",\"Scope\"])\n", + "\n", + "scope_data_mapping = {\n", + " \"Service within GTFS Schedule Dataset\":\"GTFS Schedule\",\n", + " \"Service within TripUpdates Dataset\":\"GTFS TripUpdates\",\n", + " \"Service within VehiclePositions Dataset\":\"GTFS VehiclePositions\",\n", + " \"Service within Alerts Dataset\":\"GTFS Alerts\",\n", + "}\n", + "\n", + "service_checks_cross_df[\"Dataset Type\"] = service_checks_cross_df[\"Scope\"].map(scope_data_mapping)\n", + "\n", + "service_checks_cross_df = service_checks_cross_df.drop(columns=[\"Scope\"])\n", + "\n", + "service_checks_cross_df[\"Dataset Type\"].value_counts()\n", + "service_checks_cross_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs-service recordDataset TypeGTFS DatasetServices
rec05rHUgCVFqsXgNTulare Intermodal Express – Tulare AlertsGTFS AlertsTulare AlertsTulare Intermodal Express
rec09fVK7ls8fl5spMonterey-Salinas Transit – Monterey Salinas Tr...GTFS TripUpdatesMonterey Salinas TripUpdatesMonterey-Salinas Transit
rec0WcjVurdKkHx7SHealdsburg Shuttle – Bay Area 511 TripUpdatesGTFS TripUpdatesBay Area 511 TripUpdatesHealdsburg Shuttle
rec0XvgWtnmoUl8LzGlenn Ride – Glenn ScheduleGTFS ScheduleGlenn ScheduleGlenn Ride
rec0YWNONFHEO0pUlValley Express – VCTC AlertsGTFS AlertsVCTC AlertsValley Express
...............
reczSG6bO6il6QMLZHealdsburg Shuttle – Bay Area 511 ScheduleGTFS ScheduleBay Area 511 ScheduleHealdsburg Shuttle
reczZKKwKsVQuU0dFPlacer County Transit – Placer ScheduleGTFS SchedulePlacer SchedulePlacer County Transit
reczf9FxlvLopZybbTaft Area Transit – Taft ScheduleGTFS ScheduleTaft ScheduleTaft Area Transit
recziP7dvY7apKbPBKern Transit – Kern ScheduleGTFS ScheduleKern ScheduleKern Transit
reczmEIm3KnSr2jF3Monterey-Salinas Transit – Monterey Salinas Ve...GTFS VehiclePositionsMonterey Salinas VehiclePositionsMonterey-Salinas Transit
\n", + "

543 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " gtfs-service record \\\n", + "rec05rHUgCVFqsXgN Tulare Intermodal Express – Tulare Alerts \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit – Monterey Salinas Tr... \n", + "rec0WcjVurdKkHx7S Healdsburg Shuttle – Bay Area 511 TripUpdates \n", + "rec0XvgWtnmoUl8Lz Glenn Ride – Glenn Schedule \n", + "rec0YWNONFHEO0pUl Valley Express – VCTC Alerts \n", + "... ... \n", + "reczSG6bO6il6QMLZ Healdsburg Shuttle – Bay Area 511 Schedule \n", + "reczZKKwKsVQuU0dF Placer County Transit – Placer Schedule \n", + "reczf9FxlvLopZybb Taft Area Transit – Taft Schedule \n", + "recziP7dvY7apKbPB Kern Transit – Kern Schedule \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit – Monterey Salinas Ve... \n", + "\n", + " Dataset Type GTFS Dataset \\\n", + "rec05rHUgCVFqsXgN GTFS Alerts Tulare Alerts \n", + "rec09fVK7ls8fl5sp GTFS TripUpdates Monterey Salinas TripUpdates \n", + "rec0WcjVurdKkHx7S GTFS TripUpdates Bay Area 511 TripUpdates \n", + "rec0XvgWtnmoUl8Lz GTFS Schedule Glenn Schedule \n", + "rec0YWNONFHEO0pUl GTFS Alerts VCTC Alerts \n", + "... ... ... \n", + "reczSG6bO6il6QMLZ GTFS Schedule Bay Area 511 Schedule \n", + "reczZKKwKsVQuU0dF GTFS Schedule Placer Schedule \n", + "reczf9FxlvLopZybb GTFS Schedule Taft Schedule \n", + "recziP7dvY7apKbPB GTFS Schedule Kern Schedule \n", + "reczmEIm3KnSr2jF3 GTFS VehiclePositions Monterey Salinas VehiclePositions \n", + "\n", + " Services \n", + "rec05rHUgCVFqsXgN Tulare Intermodal Express \n", + "rec09fVK7ls8fl5sp Monterey-Salinas Transit \n", + "rec0WcjVurdKkHx7S Healdsburg Shuttle \n", + "rec0XvgWtnmoUl8Lz Glenn Ride \n", + "rec0YWNONFHEO0pUl Valley Express \n", + "... ... \n", + "reczSG6bO6il6QMLZ Healdsburg Shuttle \n", + "reczZKKwKsVQuU0dF Placer County Transit \n", + "reczf9FxlvLopZybb Taft Area Transit \n", + "recziP7dvY7apKbPB Kern Transit \n", + "reczmEIm3KnSr2jF3 Monterey-Salinas Transit \n", + "\n", + "[543 rows x 4 columns]" + ] + }, + "execution_count": 242, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "services_gtfs_df = gtfs_services_df[[\"Name\",\"Dataset Type\",\"GTFS Dataset\",\"Services\"]].rename(columns={\"Name\":\"gtfs-service record\"})\n", + "\n", + "# make services a list if it isn't already\n", + "services_gtfs_df[\"Services\"]=services_gtfs_df[\"Services\"].apply(lambda x: x.split(\",\"))\n", + "\n", + "# if dataset type is a list, then take first value\n", + "services_gtfs_df[\"Dataset Type\"]=services_gtfs_df[\"Dataset Type\"].apply(lambda x: x[0])\n", + "\n", + "all_services_gtfs_df = services_gtfs_df.explode(\"Services\")\n", + "all_services_gtfs_df[\"Services\"]=all_services_gtfs_df[\"Services\"].apply(lambda x: x.strip())\n", + "\n", + "all_services_gtfs_df" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Check #gtfs-service recordServices
032NaNAC Transit
113AC Transit – Bay Area 511 ScheduleAC Transit
210AC Transit – Bay Area 511 ScheduleAC Transit
38AC Transit – Bay Area 511 ScheduleAC Transit
47AC Transit – Bay Area 511 ScheduleAC Transit
............
31359the Link-Athens, the Link Florence-Firestone/W...the Link-Baldwin Hills Parklands
313614the Link-Athens, the Link Florence-Firestone/W...the Link-Baldwin Hills Parklands
313711the Link-Athens, the Link Florence-Firestone/W...the Link-Baldwin Hills Parklands
313818NaNthe Link-Baldwin Hills Parklands
313925NaNthe Link-Baldwin Hills Parklands
\n", + "

3140 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Check # gtfs-service record \\\n", + "0 32 NaN \n", + "1 13 AC Transit – Bay Area 511 Schedule \n", + "2 10 AC Transit – Bay Area 511 Schedule \n", + "3 8 AC Transit – Bay Area 511 Schedule \n", + "4 7 AC Transit – Bay Area 511 Schedule \n", + "... ... ... \n", + "3135 9 the Link-Athens, the Link Florence-Firestone/W... \n", + "3136 14 the Link-Athens, the Link Florence-Firestone/W... \n", + "3137 11 the Link-Athens, the Link Florence-Firestone/W... \n", + "3138 18 NaN \n", + "3139 25 NaN \n", + "\n", + " Services \n", + "0 AC Transit \n", + "1 AC Transit \n", + "2 AC Transit \n", + "3 AC Transit \n", + "4 AC Transit \n", + "... ... \n", + "3135 the Link-Baldwin Hills Parklands \n", + "3136 the Link-Baldwin Hills Parklands \n", + "3137 the Link-Baldwin Hills Parklands \n", + "3138 the Link-Baldwin Hills Parklands \n", + "3139 the Link-Baldwin Hills Parklands \n", + "\n", + "[3140 rows x 3 columns]" + ] + }, + "execution_count": 252, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# attach information about gtfs datasets for each service to evaluate\n", + "\n", + "service_checkdata_df = service_checks_cross_df.merge(\n", + " all_services_gtfs_df,\n", + " on=[\"Services\",\"Dataset Type\"],how=\"left\")\n", + "\n", + "\n", + "keep_cols = [\"Check #\",\"gtfs-service record\",\"Services\"]\n", + "keep_service_checkdata_df = service_checkdata_df[keep_cols] #.drop_duplicates()\n", + "keep_service_checkdata_df.to_csv(\"service_cross.csv\",index=False,sep=\"|\")\n", + "#service_checkdata_df.loc[service_checkdata_df[\"gtfs-service record\"].isna()]\n", + "keep_service_checkdata_df\n", + "#service_checkdata_df" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Check #ServicesDataset Typegtfs-service recordGTFS Dataset
032AC TransitGTFS AlertsNaNNaN
113AC TransitGTFS ScheduleAC Transit – Bay Area 511 ScheduleBay Area 511 Schedule
210AC TransitGTFS ScheduleAC Transit – Bay Area 511 ScheduleBay Area 511 Schedule
38AC TransitGTFS ScheduleAC Transit – Bay Area 511 ScheduleBay Area 511 Schedule
47AC TransitGTFS ScheduleAC Transit – Bay Area 511 ScheduleBay Area 511 Schedule
\n", + "
" + ], + "text/plain": [ + " Check # Services Dataset Type gtfs-service record \\\n", + "0 32 AC Transit GTFS Alerts NaN \n", + "1 13 AC Transit GTFS Schedule AC Transit – Bay Area 511 Schedule \n", + "2 10 AC Transit GTFS Schedule AC Transit – Bay Area 511 Schedule \n", + "3 8 AC Transit GTFS Schedule AC Transit – Bay Area 511 Schedule \n", + "4 7 AC Transit GTFS Schedule AC Transit – Bay Area 511 Schedule \n", + "\n", + " GTFS Dataset \n", + "0 NaN \n", + "1 Bay Area 511 Schedule \n", + "2 Bay Area 511 Schedule \n", + "3 Bay Area 511 Schedule \n", + "4 Bay Area 511 Schedule " + ] + }, + "execution_count": 245, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "service_checkdata_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_checks_df = pd.read_csv(os.path.join(\"data\",\"gtfs checks-Dataset Scope.csv\"), usecols = [\"Check #\",\"Scope\"])\n", + "datasets_df = pd.read_csv(os.path.join(\"data\",\"Assessed Feeds.csv\"), usecols = [\"gtfs_dataset_id\",\"Data\"])\n", + "\n", + "# cartesian product\n", + "dataset_cross_df = datasets_df.merge(dataset_checks_df, how='cross')\n", + "\n", + "# select applicable checks\n", + "dataset_cross_df[\"data_match\"]=dataset_cross_df[\"Data\"]+\" Dataset\"\n", + "dataset_cross_df[\"scope_match\"] = dataset_cross_df[\"Scope\"]\n", + "dataset_cross_df.loc[\n", + " dataset_cross_df[\"Scope\"] != \"GTFS Schedule Dataset\", \n", + " \"scope_match\"\n", + "]=\"GTFS \" + dataset_cross_df[\"Scope\"]\n", + "dataset_checkdata_df = dataset_cross_df[dataset_cross_df[\"data_match\"]==dataset_cross_df[\"scope_match\"]]\n", + "\n", + "#checks \"GTFS Schedule Dataset\"\n", + "#dataset \"GTFS Schedule\"\n", + "\n", + "dataset_checkdata_df[[\"gtfs_dataset_id\",\"Check #\"]].to_csv(\"data_cross.csv\",index=False,sep=\"|\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_idDataCheck #Scopedata_matchscope_match
0Get Around Town Express ScheduleGTFS Schedule1GTFS Schedule DatasetGTFS Schedule DatasetGTFS Schedule Dataset
1Get Around Town Express ScheduleGTFS Schedule2GTFS Schedule DatasetGTFS Schedule DatasetGTFS Schedule Dataset
2Get Around Town Express ScheduleGTFS Schedule3GTFS Schedule DatasetGTFS Schedule DatasetGTFS Schedule Dataset
3Get Around Town Express ScheduleGTFS Schedule4GTFS Schedule DatasetGTFS Schedule DatasetGTFS Schedule Dataset
4Get Around Town Express ScheduleGTFS Schedule5GTFS Schedule DatasetGTFS Schedule DatasetGTFS Schedule Dataset
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_id Data Check # \\\n", + "0 Get Around Town Express Schedule GTFS Schedule 1 \n", + "1 Get Around Town Express Schedule GTFS Schedule 2 \n", + "2 Get Around Town Express Schedule GTFS Schedule 3 \n", + "3 Get Around Town Express Schedule GTFS Schedule 4 \n", + "4 Get Around Town Express Schedule GTFS Schedule 5 \n", + "\n", + " Scope data_match scope_match \n", + "0 GTFS Schedule Dataset GTFS Schedule Dataset GTFS Schedule Dataset \n", + "1 GTFS Schedule Dataset GTFS Schedule Dataset GTFS Schedule Dataset \n", + "2 GTFS Schedule Dataset GTFS Schedule Dataset GTFS Schedule Dataset \n", + "3 GTFS Schedule Dataset GTFS Schedule Dataset GTFS Schedule Dataset \n", + "4 GTFS Schedule Dataset GTFS Schedule Dataset GTFS Schedule Dataset " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset_checkdata_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "provider_checks_df = pd.read_csv(os.path.join(\"data\",\"provider_checks.csv\"))\n", + "providers_df = pd.read_csv(os.path.join(\"data\",\"Assessed Transit Providers-Grid view.csv\"), usecols = [\"Name\"])\n", + "\n", + "# cartesian product\n", + "providers_cross_df = providers_df.merge(provider_checks_df, how='cross')\n", + "\n", + "providers_cross_df.to_csv(\"provider_cross.csv\",index=False,sep=\"|\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Namechecks
0Alameda-Contra Costa Transit District37
1Alameda-Contra Costa Transit District39
2Alameda-Contra Costa Transit District40
3Amador Regional Transit System37
4Amador Regional Transit System39
\n", + "
" + ], + "text/plain": [ + " Name checks\n", + "0 Alameda-Contra Costa Transit District 37\n", + "1 Alameda-Contra Costa Transit District 39\n", + "2 Alameda-Contra Costa Transit District 40\n", + "3 Amador Regional Transit System 37\n", + "4 Amador Regional Transit System 39" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "providers_cross_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + }, + "kernelspec": { + "display_name": "Python 3.9.4 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/transit_riders/hh_df_3way_ct.png b/transit_riders/hh_df_3way_ct.png new file mode 100644 index 000000000..f46e8209d Binary files /dev/null and b/transit_riders/hh_df_3way_ct.png differ diff --git a/transit_riders/hh_df_ct.png b/transit_riders/hh_df_ct.png new file mode 100644 index 000000000..5d52e2537 Binary files /dev/null and b/transit_riders/hh_df_ct.png differ diff --git a/transit_riders/hh_df_ct_r.png b/transit_riders/hh_df_ct_r.png new file mode 100644 index 000000000..3ebf41bc3 Binary files /dev/null and b/transit_riders/hh_df_ct_r.png differ diff --git a/transit_riders/readme.md b/transit_riders/readme.md new file mode 100644 index 000000000..f9b78c624 --- /dev/null +++ b/transit_riders/readme.md @@ -0,0 +1,17 @@ +# Questions about transit riders + +## Available Data + +[2010-2012 California Household Travel Survey](https://www.nrel.gov/transportation/secure-transportation-data/tsdc-california-travel-survey.html), hosted at the Transportation Secure Data Center at NREL + +[2017 National Household TravelSurvey](https://nhts.ornl.gov/) + +## What % of transit riders have cars, in CA? + +*From:* Gillian Gillett + +*Date:* 2022-03-23 + +[Analysis Notebook](transit_riders_with_autos.ipynb) + +[Results](results.md) diff --git a/transit_riders/results.md b/transit_riders/results.md new file mode 100644 index 000000000..4318dd192 --- /dev/null +++ b/transit_riders/results.md @@ -0,0 +1,35 @@ + + +**Which transit-using households have vehicles?** + +| HHold Vehicles | Frequent Transit | Infrequent Transit | No Transit Use | +|:-----------------|-------------------:|---------------------:|-----------------:| +| Has vehicle | 16.3% | 32.3% | 51.4% | +| No Vehicle | 76.5% | 8.5% | 15.0% | +| All | 20.3% | 30.7% | 49.0% | + +![](hh_df_ct.png) + +**Which vehicle-owning households ride transit?** + +| HHold Transit Use in California (Source: NHTS 2017) | Has vehicle | No Vehicle | +|:------------------------------------------------------|--------------:|-------------:| +| Frequent Transit | 75.2% | 24.8% | +| Infrequent Transit | 98.2% | 1.8% | +| No Transit Use | 98.0% | 2.0% | +| All | 93.4% | 6.6% | + +![](hh_df_ct_r.png) + +**Does it vary among households who are financially burdened by transportation?** + +| | Frequent Transit | Infrequent Transit | No Transit Use | +|:--------------------------------|-------------------:|---------------------:|-----------------:| +| ('Burdened', 'Has vehicle') | 17.7% | 31.0% | 51.3% | +| ('Burdened', 'No Vehicle') | 76.1% | 9.2% | 14.7% | +| ('Not Burdened', 'Has vehicle') | 15.3% | 33.3% | 51.4% | +| ('Not Burdened', 'No Vehicle') | 77.0% | 7.8% | 15.3% | +| ('All', '') | 20.3% | 30.7% | 49.0% | + + +![](hh_df_3way_ct.png) \ No newline at end of file diff --git a/transit_riders/transit_riders_with_autos.ipynb b/transit_riders/transit_riders_with_autos.ipynb new file mode 100644 index 000000000..a465a9dd5 --- /dev/null +++ b/transit_riders/transit_riders_with_autos.ipynb @@ -0,0 +1,1173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How Many Transit Riders in California have Autos?\n", + "\n", + "Questioner: Gillian Gillett \n", + "March 23, 2022" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Environment\n", + "\n", + "! Warning: will install libraries into current environment." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:root:test\n" + ] + } + ], + "source": [ + "import logging\n", + "\n", + "logging.basicConfig(level=logging.DEBUG)\n", + "logging.debug(\"test\")" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import pandas as pd\n", + " import seaborn as sns\n", + " import matplotlib.pyplot as plt\n", + "except:\n", + " logging.info('pandas seaborn not found. Will try and install into current environment')\n", + " ! conda install pandas seaborn \n", + " import pandas as pd\n", + " import seaborn as sns\n", + " import matplotlib.pyplot as plt\n", + "\n", + "pd.set_option(\"display.max.columns\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Working directory: /Users/elizabeth/Documents/urbanlabs/CA_Interoperable/working/data-analyses\n" + ] + } + ], + "source": [ + "import os\n", + "WORKING_DIR = os.path.dirname(os.getcwd())\n", + "\n", + "logging.info(f\"Working directory: {WORKING_DIR}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## National Household Travel Survey\n", + "\n", + "Data Codebook: [https://nhts.ornl.gov/tables09/CodebookBrowser.aspx](https://nhts.ornl.gov/tables09/CodebookBrowser.aspx)\n", + "\n", + "Relevant variables:\n", + "\n", + "- HOUSEID\tHousehold Identifier \n", + "- HBHUR\tUrban / Rural indicator - Block group \n", + "- HHFAMINC\tHousehold income \n", + "- HHSIZE\tCount of household members \n", + "- HHSTATE\tHousehold state\n", + "- HHVEHCNT\tCount of Household vehicles\n", + "- WRKCOUNT\tNumber of workers in household\n", + "- WTHHFIN\tFinal HH weight\n", + "\n", + "- CAR\tFrequency of Personal Vehicle Use for Travel\n", + "- BUS\tFrequency of Bus Use for Travel\n", + "- PARA\tFrequency of Paratransit Use for Travel\n", + "- TAXI\tFrequency of Taxi Service or Rideshare Use for Travel\n", + "- [WALK](https://nhts.ornl.gov/tables09/CodebookPage.aspx?id=1365) Frequency of Walk Use for Travel\n", + "- TRAIN\tFrequency of Train Use for Travel\n", + "\n", + "- PLACE\tTravel is a Financial Burden\n", + "- PTRANS\tPublic Transportation to Reduce Financial Burden of Travel\n", + "- WALK2SAVE\tWalk to Reduce Financial Burden of Travel" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Data\n", + "\n", + "Assumes you have downloaded and unzipped NHTS data and weights into `csv` and `ReplicatesCSV` folders respectfully." + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:NHTS Data directory: /Users/elizabeth/Documents/urbanlabs/CA_Interoperable/working/NHTS\n" + ] + } + ], + "source": [ + "NHTS_DATA_DIR = os.path.join(os.path.dirname(WORKING_DIR), 'NHTS')\n", + "logging.info(f\"NHTS Data directory: {NHTS_DATA_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HOUSEIDHHSIZEHHVEHCNTHHFAMINCBUSTRAINPLACEHHSTATEWTHHFIN
6300000412211445CA788.614240
930000085129553CA190.669041
1130000094114552CA163.382292
193000015512-7542CA120.772451
2330000227226-9-92CA62.015790
\n", + "
" + ], + "text/plain": [ + " HOUSEID HHSIZE HHVEHCNT HHFAMINC BUS TRAIN PLACE HHSTATE \\\n", + "6 30000041 2 2 11 4 4 5 CA \n", + "9 30000085 1 2 9 5 5 3 CA \n", + "11 30000094 1 1 4 5 5 2 CA \n", + "19 30000155 1 2 -7 5 4 2 CA \n", + "23 30000227 2 2 6 -9 -9 2 CA \n", + "\n", + " WTHHFIN \n", + "6 788.614240 \n", + "9 190.669041 \n", + "11 163.382292 \n", + "19 120.772451 \n", + "23 62.015790 " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols_to_keep = [\n", + " 'HOUSEID',\n", + " 'HHSIZE',\n", + " 'HHVEHCNT',\n", + " 'HHFAMINC',\n", + " 'BUS',\n", + " 'TRAIN',\n", + " 'WTHHFIN',\n", + " 'PLACE',\n", + " 'HHSTATE',\n", + "]\n", + "\n", + "hh_all_df = pd.read_csv(\n", + " os.path.join(NHTS_DATA_DIR, 'csv','hhpub.csv'),\n", + " usecols=cols_to_keep,\n", + ")\n", + "hh_all_df = hh_all_df[hh_all_df['HHSTATE'] == 'CA']\n", + "\n", + "hh_all_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Analyze NHTS Data\n", + "\n", + "#### Recode Variables" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "def is_transit_user(x):\n", + " _FREQ_TRANSIT = [1,2,3]\n", + " _INFREQ_TRANSIT = [4,5]\n", + "\n", + " # UNKOWN\n", + " if int(x['BUS']) < 0 or int(x['TRAIN']) < 0:\n", + " return \"Unknown\"\n", + " # NO\n", + " if int(x['BUS']) == 5 and int(x['TRAIN']) == 5:\n", + " return \"No Transit Use\"\n", + " # YES\n", + " if int(x['BUS']) in _FREQ_TRANSIT or int(x['TRAIN']) in _FREQ_TRANSIT:\n", + " return \"Frequent Transit\"\n", + " if int(x['BUS']) in _FREQ_TRANSIT+_INFREQ_TRANSIT or int(x['TRAIN']) in _FREQ_TRANSIT+_INFREQ_TRANSIT:\n", + " return \"Infrequent Transit\"\n", + " else:\n", + " logging.debug(f\"Unable to process row for is_transit_user:\\n {x}\")\n", + " raise Exception(f'Unable to determine if transit user for row: {x}')\n", + "\n", + "def has_hh_veh(x):\n", + " # UNKOWN\n", + " if int(x['HHVEHCNT']) < 0:\n", + " return \"Unknown\"\n", + " # NO\n", + " if int(x['HHVEHCNT']) == 0:\n", + " return \"No Vehicle\"\n", + " # YES\n", + " if int(x['HHVEHCNT']) > 0:\n", + " return \"Has vehicle\"\n", + " else:\n", + " logging.debug(f\"Unable to process row for has_hh_veh:\\n {x}\")\n", + " raise Exception(f'Unable to determine if household has vehicles for row: {x}')\n", + "\n", + "def travel_burden_hh(x):\n", + " BURDEN = [1,2]\n", + " NOT_BURDEN = [3,4,5]\n", + " # UNKOWN\n", + " if int(x['PLACE']) < 0:\n", + " return \"Unknown\"\n", + " # NO\n", + " if int(x['PLACE']) in NOT_BURDEN:\n", + " return \"Not Burdened\"\n", + " # YES\n", + " if int(x['PLACE']) in BURDEN:\n", + " return \"Burdened\"\n", + " else:\n", + " logging.debug(f\"Unable to process row for travel_burden_hh:\\n {x}\")\n", + " raise Exception(f'Unable to determine if household has financial burden to travel for row: {x}')\n", + " \n", + "def filter_recs(x):\n", + " FILTER_COLS = ['transit_hh','vehicle_hh','burden_hh']\n", + " FILTER_VALUE = \"Unknown\"\n", + " if FILTER_VALUE in [x[c] for c in FILTER_COLS]:\n", + " return -1\n", + " return 1" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DEBUG:root:Excluded records:\n", + " transit_hh vehicle_hh burden_hh\n", + "23 Unknown Has vehicle Burdened\n", + "106 Unknown Has vehicle Unknown\n", + "141 Unknown Has vehicle Not Burdened\n", + "211 Unknown Has vehicle Unknown\n", + "221 Unknown Has vehicle Unknown\n", + "/var/folders/60/xd2kny110pxfz3ln611jq7hm0000gn/T/ipykernel_8795/143102953.py:12: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " hh_df[c]= pd.Categorical(\n", + "DEBUG:root:Cleaned records:\n", + " transit_hh vehicle_hh burden_hh\n", + "6 Infrequent Transit Has vehicle Not Burdened\n", + "9 No Transit Use Has vehicle Not Burdened\n", + "11 No Transit Use Has vehicle Burdened\n", + "19 Infrequent Transit Has vehicle Burdened\n", + "37 No Transit Use Has vehicle Burdened\n", + "INFO:root:Cleaned records filtered to exclude 2927(11.2%) of 26099 records\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HOUSEIDHHSIZEHHVEHCNTHHFAMINCBUSTRAINPLACEHHSTATEWTHHFINtransit_hhvehicle_hhburden_hhkeep
6300000412211445CA788.614240Infrequent TransitHas vehicleNot Burdened1
930000085129553CA190.669041No Transit UseHas vehicleNot Burdened1
1130000094114552CA163.382292No Transit UseHas vehicleBurdened1
193000015512-7542CA120.772451Infrequent TransitHas vehicleBurdened1
2330000227226-9-92CA62.015790UnknownHas vehicleBurdened-1
..........................................
12967940794135235553CA63.217848No Transit UseHas vehicleNot Burdened1
12968240794179116552CA377.126813No Transit UseHas vehicleBurdened1
12968540794233238553CA33.421852No Transit UseHas vehicleNot Burdened1
12969140794291119555CA41.869638No Transit UseHas vehicleNot Burdened1
129693407942942210554CA207.672765No Transit UseHas vehicleNot Burdened1
\n", + "

26099 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " HOUSEID HHSIZE HHVEHCNT HHFAMINC BUS TRAIN PLACE HHSTATE \\\n", + "6 30000041 2 2 11 4 4 5 CA \n", + "9 30000085 1 2 9 5 5 3 CA \n", + "11 30000094 1 1 4 5 5 2 CA \n", + "19 30000155 1 2 -7 5 4 2 CA \n", + "23 30000227 2 2 6 -9 -9 2 CA \n", + "... ... ... ... ... ... ... ... ... \n", + "129679 40794135 2 3 5 5 5 3 CA \n", + "129682 40794179 1 1 6 5 5 2 CA \n", + "129685 40794233 2 3 8 5 5 3 CA \n", + "129691 40794291 1 1 9 5 5 5 CA \n", + "129693 40794294 2 2 10 5 5 4 CA \n", + "\n", + " WTHHFIN transit_hh vehicle_hh burden_hh keep \n", + "6 788.614240 Infrequent Transit Has vehicle Not Burdened 1 \n", + "9 190.669041 No Transit Use Has vehicle Not Burdened 1 \n", + "11 163.382292 No Transit Use Has vehicle Burdened 1 \n", + "19 120.772451 Infrequent Transit Has vehicle Burdened 1 \n", + "23 62.015790 Unknown Has vehicle Burdened -1 \n", + "... ... ... ... ... ... \n", + "129679 63.217848 No Transit Use Has vehicle Not Burdened 1 \n", + "129682 377.126813 No Transit Use Has vehicle Burdened 1 \n", + "129685 33.421852 No Transit Use Has vehicle Not Burdened 1 \n", + "129691 41.869638 No Transit Use Has vehicle Not Burdened 1 \n", + "129693 207.672765 No Transit Use Has vehicle Not Burdened 1 \n", + "\n", + "[26099 rows x 13 columns]" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analysis_cols = ['transit_hh','vehicle_hh','burden_hh']\n", + "hh_all_df['transit_hh'] = hh_all_df.apply(lambda x: is_transit_user(x), axis=1)\n", + "hh_all_df['vehicle_hh'] = hh_all_df.apply(lambda x: has_hh_veh(x), axis=1)\n", + "hh_all_df['burden_hh'] = hh_all_df.apply(lambda x: travel_burden_hh(x), axis=1)\n", + "hh_all_df['keep'] = hh_all_df.apply(lambda x: filter_recs(x) ,axis=1)\n", + "\n", + "logging.debug(f\"Excluded records:\\n{hh_all_df[hh_all_df['keep']<0][analysis_cols].head()}\")\n", + "\n", + "hh_df = hh_all_df[hh_all_df['keep']>0]\n", + "\n", + "for c in analysis_cols:\n", + " hh_df[c]= pd.Categorical(\n", + " hh_df[c],\n", + " ordered = True,\n", + " )\n", + " \n", + "logging.debug(f\"Cleaned records:\\n{hh_df[analysis_cols].head()}\")\n", + "\n", + "recs_exc = len(hh_all_df)-len(hh_df)\n", + "logging.info(f\"Cleaned records filtered to exclude {recs_exc}({round(100*recs_exc/len(hh_all_df),1)}%) of {len(hh_all_df)} records\")\n", + "hh_all_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 HHold Transit Use in California (Source: NHTS 2017)Frequent TransitInfrequent TransitNo Transit Use
HHold Transportation BurdenHHold Vehicles   
BurdenedHas vehicle17.7%31.0%51.3%
No Vehicle76.1%9.2%14.7%
Not BurdenedHas vehicle15.3%33.3%51.4%
No Vehicle77.0%7.8%15.3%
All20.3%30.7%49.0%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 194, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh_df_3way_ct = pd.crosstab(\n", + " [hh_df['burden_hh'],hh_df['vehicle_hh']],\n", + " hh_df['transit_hh'], \n", + " values = hh_df['WTHHFIN'], \n", + " aggfunc = sum,\n", + " normalize='index',\n", + " margins = True,\n", + " rownames=['HHold Transportation Burden','HHold Vehicles'], \n", + " colnames=['HHold Transit Use in California (Source: NHTS 2017)'],\n", + ")\n", + "s_hh_df_3way_ct=hh_df_3way_ct.style\\\n", + " .background_gradient()\\\n", + " .format(\"{:.1%}\")\n", + " \n", + "s_hh_df_3way_ct" + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HHold Transit Use in California (Source: NHTS 2017)Frequent TransitInfrequent TransitNo Transit Use
HHold Vehicles   
Has vehicle16.3%32.3%51.4%
No Vehicle76.5%8.5%15.0%
All20.3%30.7%49.0%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 195, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh_df_ct = pd.crosstab(\n", + " hh_df['vehicle_hh'],\n", + " hh_df['transit_hh'], \n", + " values = hh_df['WTHHFIN'], \n", + " aggfunc = sum,\n", + " normalize='index',\n", + " margins = True,\n", + " rownames=['HHold Vehicles'], \n", + " colnames=['HHold Transit Use in California (Source: NHTS 2017)'],\n", + ")\n", + "s_hh_df_ct=hh_df_ct.style\\\n", + " .background_gradient()\\\n", + " .format(\"{:.1%}\")\n", + "\n", + "s_hh_df_ct" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HHold VehiclesHas vehicleNo Vehicle
HHold Transit Use in California (Source: NHTS 2017)  
Frequent Transit75.2%24.8%
Infrequent Transit98.2%1.8%
No Transit Use98.0%2.0%
All93.4%6.6%
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hh_df_ct_r = pd.crosstab(\n", + " hh_df['transit_hh'], \n", + " hh_df['vehicle_hh'],\n", + " values = hh_df['WTHHFIN'], \n", + " aggfunc = sum,\n", + " normalize='index',\n", + " margins = True,\n", + " colnames=['HHold Vehicles'], \n", + " rownames=['HHold Transit Use in California (Source: NHTS 2017)'],\n", + ")\n", + "s_hh_df_ct_r=hh_df_ct_r.style\\\n", + " .background_gradient()\\\n", + " .format(\"{:.1%}\")\n", + "\n", + "s_hh_df_ct_r" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Results" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": {}, + "outputs": [], + "source": [ + "# Export to markdown\n", + "\n", + "try: \n", + " import tabulate\n", + "except:\n", + " !pip install tabulate\n", + " \n", + "with open(\"results.md\",\"w\") as f:\n", + " f.write(\"\\n\\n**Which transit-using households have vehicles?**\\n\\n\")\n", + " f.write(hh_df_ct.to_markdown(floatfmt=\".1%\"))\n", + " f.write(\"\\n\\n**Which vehicle-owning households ride transit?**\\n\\n\")\n", + " f.write(hh_df_ct_r.to_markdown(floatfmt=\".1%\"))\n", + " f.write(\"\\n\\n**Does it vary among households who are financially burdened by transportation?**\\n\\n\")\n", + " f.write(hh_df_3way_ct.to_markdown(floatfmt=\".1%\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/elizabeth/opt/miniconda3/envs/calitp/lib/python3.10/site-packages/dataframe_image/_pandas_accessor.py:69: FutureWarning: this method is deprecated in favour of `Styler.to_html()`\n", + " html = '
' + obj.render() + '
'\n", + "[0324/132220.507694:INFO:headless_shell.cc(659)] Written to file /var/folders/60/xd2kny110pxfz3ln611jq7hm0000gn/T/tmp2anbgsyh/temp.png.\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'iCCP' 41 295\n", + "DEBUG:PIL.PngImagePlugin:iCCP profile name b'Skia'\n", + "DEBUG:PIL.PngImagePlugin:Compression method 0\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 348 8192\n", + "[0324/132222.411945:INFO:headless_shell.cc(659)] Written to file /var/folders/60/xd2kny110pxfz3ln611jq7hm0000gn/T/tmp3rt1v9vm/temp.png.\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'iCCP' 41 295\n", + "DEBUG:PIL.PngImagePlugin:iCCP profile name b'Skia'\n", + "DEBUG:PIL.PngImagePlugin:Compression method 0\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 348 8192\n", + "[0324/132225.100427:INFO:headless_shell.cc(659)] Written to file /var/folders/60/xd2kny110pxfz3ln611jq7hm0000gn/T/tmppj7306m7/temp.png.\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'iCCP' 41 295\n", + "DEBUG:PIL.PngImagePlugin:iCCP profile name b'Skia'\n", + "DEBUG:PIL.PngImagePlugin:Compression method 0\n", + "DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 348 8192\n" + ] + } + ], + "source": [ + "## Exporting to images\n", + "try:\n", + " import dataframe_image as dfi\n", + "except:\n", + " ! pip install dataframe_image\n", + " import dataframe_image as dfi\n", + "\n", + "dfi.export(s_hh_df_ct,\"hh_df_ct.png\")\n", + "dfi.export(s_hh_df_ct_r,\"hh_df_ct_r.png\")\n", + "dfi.export(s_hh_df_3way_ct,\"hh_df_3way_ct.png\")" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot Chord Chart\n", + "# Currently getting an error with this >:(\n", + "\n", + "try:\n", + " from chord import Chord\n", + "except:\n", + " ! pip install chord\n", + " from chord import Chord\n", + "\n", + "names=list(hh_df_ct.columns)\n", + "matrix = hh_df_ct.values.tolist()\n", + "ch=Chord(hh_df_ct,names)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAKaCAYAAAAZPRD5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAOnUlEQVR4nO3cz2vXBRzH8U23scq5tQ5RkSWKipBJSpcounSpyG4FEXXqFnQK+ivqHHTpDwiJqGvUsQg0IshCKCmNJVuusE39duoHZIZfw8/TL4/HbZ/vDq/jkzcfPtOj0WgKAACKtgw9AAAA/o1YBQAgS6wCAJAlVgEAyBKrAABkzVzpx5c/e+7T6zUE+MuDC98MPYExzU1fHHoCwA3pmd2fHL7cc5dVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCArJmhB/D/++DVj+899cmZxfnFuQvPH33qi6H3AACMy2V1Au0/smvlidcfOTH0DgCAayVWJ9A9D925ftPS/IWhdwAAXCuxCgBAllgFACBLrAIAkCVWAQDI8umqCfTeKx/uPP35Twu/nduYeeuxdw4cenH/9wef27cy9C4AgKslVifQk288enLoDQAA/wevAQAAkCVWAQDI8hoABL3/04H//J/Hbzt+HZZwtTZGW4eeADBRXFYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMgSqwAAZIlVAACyxCoAAFliFQCALLEKAECWWAUAIEusAgCQNXOlHz96+/D12gH8zf5nvxx6AgAkuKwCAJB1xcsqN6bVE8e2//DxuzumRqOppX2HVu546InTQ28CABiHy+qEGV26OPXDR0d37Hz6pa/2vvDaF2tfH1/+9cfv5ofeBQAwDrE6YdZPfXPL7MLyb/PLt29smZkdLe6+7+zaiWNLQ+8CABiHWJ0wm+dW52a3LW788ffstqWNzfWf54bcBAAwLrE6cUb/eDI9fZmHAAA3ALE6YWYXbt3YXF/785K6ub46N3PL9s0hNwEAjEusTphtd+36ZfPc2fnzZ8/MXbqwOb329efLi7vvXx16FwDAOHy6asJMb906dcfDR749efTNPVOjS1NLex9Yufn2u88PvQsAYBxidQIt7Tm4trTn4NrQOwAArpXXAAAAyBKrAABkTY9GvmoEAECTyyoAAFliFQCALLEKAECWWAUAIEusAgCQJVYBAMj6HVc/g0/PmESWAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Plot Tree Map\n", + "# This is really ugly >:(\n", + "try:\n", + " import squarify \n", + "except:\n", + " ! pip install squarify \n", + " import squarify \n", + "\n", + "matrix = hh_df_ct.values.tolist()\n", + "values = [item for sublist in matrix for item in sublist]\n", + "\n", + "names=list(hh_df_ct.columns)\n", + "\n", + "fig, ax = plt.subplots(1, figsize = (12,12))\n", + "squarify.plot(sizes=values, \n", + " label=names, \n", + " alpha=.8 )\n", + "plt.axis('off')\n", + "plt.show()" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}