diff --git a/sb125_analyses/quick_mts_jobs/quick_mts_jobs.ipynb b/sb125_analyses/quick_mts_jobs/quick_mts_jobs.ipynb new file mode 100644 index 000000000..1529cd867 --- /dev/null +++ b/sb125_analyses/quick_mts_jobs/quick_mts_jobs.ipynb @@ -0,0 +1,1511 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ab7b00dc-ad4c-4a56-b9dc-9075c1fa8bba", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from siuba import *\n", + "\n", + "HQTA_PATH = 'gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/export/2024-09-18/ca_hq_transit_areas.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4ba99b41-e7c9-4fbb-83fe-95bda573d41d", + "metadata": {}, + "outputs": [], + "source": [ + "hqta = gpd.read_parquet(HQTA_PATH)" + ] + }, + { + "cell_type": "markdown", + "id": "91f5ac1e-dc4d-447c-9aa1-4a58fb822ff3", + "metadata": {}, + "source": [ + "# Quick counts of people and jobs within ~0.5mi of a Major Transit Stop\n", + "\n", + "Just wrapped this up, attached are two csvs. One summarizes jobs and population within a half mile of the various types of Major Transit Stop per PRC 21155 and PRC 21064.3, the other additionally separates them out by transit agency.\n", + "\n", + "Used 2020 Census LEHD Workplace Area Characteristics to get all primary jobs by Census block, population by block is from the 2020 decennial Census. Spatial join was a simple intersection of each half-mile buffer around a major transit stop with all Census blocks, then jobs and people were summed across all those blocks to get a single figure per major stop.\n" + ] + }, + { + "cell_type": "markdown", + "id": "cbb60432-979b-4727-b7d6-392d5ef393bb", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## read clean census (already done, output on GCS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2dfaa4c9-fcc4-44e4-b2e8-af087d6b9b4d", + "metadata": {}, + "outputs": [], + "source": [ + "import gzip\n", + "# all workers, all primary jobs\n", + "with gzip.open('./quick_mts_jobs/ca_wac_S000_JT01_2021.csv.gz', 'rb') as f:\n", + " df = pd.read_csv(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a33d692-09ac-4851-bf2a-642f945ea826", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.iloc[:, :2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "617ca48d-35d2-4da9-bd27-905399ed6b14", + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f275434d-8a94-4c87-9057-7404a3e2dddc", + "metadata": {}, + "outputs": [], + "source": [ + "with zipfile.ZipFile('./quick_mts_jobs/DECENNIALPL2020.P1_2024-10-16T105517.zip', 'r') as myzip:\n", + " with myzip.open('DECENNIALPL2020.P1-Data.csv') as f:\n", + " df2 = pd.read_csv(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b9e9f90-d997-4ddc-851e-55f9e94efde3", + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df2.iloc[1:,:3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e74b3f9-1633-4646-bfda-c535f1074421", + "metadata": {}, + "outputs": [], + "source": [ + "df2.GEO_ID = df2.GEO_ID.map(lambda x: x[10:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df3bf725-177b-48eb-87d7-7d3cd7313ad7", + "metadata": {}, + "outputs": [], + "source": [ + "df2.GEO_ID = df2.GEO_ID.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb52c09c-f024-4118-85d4-3122485dc0d7", + "metadata": {}, + "outputs": [], + "source": [ + "df2.P1_001N = df2.P1_001N.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff8d931c-8dd2-48ad-ab72-d27a216146c8", + "metadata": {}, + "outputs": [], + "source": [ + "df2 >> head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "354459c3-9be8-47db-bb4e-7878cbae5e15", + "metadata": {}, + "outputs": [], + "source": [ + "df2.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31d44c92-2be5-4851-91ba-71d37a1017ee", + "metadata": {}, + "outputs": [], + "source": [ + "df >> head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3377ae1-6be9-468d-a765-578cdf688a94", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cef7a4bd-d13a-4387-9f1a-8f5faecbb547", + "metadata": {}, + "outputs": [], + "source": [ + "joined = df2 >> inner_join(_, df, on={'GEO_ID':'w_geocode'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c1c0e5a-7f70-43d5-ae05-cd0fe334ae4b", + "metadata": {}, + "outputs": [], + "source": [ + "joined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07d88a37-eabe-4052-a223-7e41f9c68b3f", + "metadata": {}, + "outputs": [], + "source": [ + "hqta >> head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12463b53-68e3-4908-86d3-29cc3f3ad6e3", + "metadata": {}, + "outputs": [], + "source": [ + "blocks = gpd.read_file('./quick_mts_jobs/tl_2020_06_tabblock20.zip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9251e399-0c2d-4de3-a35f-da563d95bdb5", + "metadata": {}, + "outputs": [], + "source": [ + "blocks = blocks >> select(_.GEOID20, _.POP20, _.geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68ab1dd0-f08c-4431-9ba9-6abd09af35cf", + "metadata": {}, + "outputs": [], + "source": [ + "blocks.GEOID20 = blocks.GEOID20.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e695d25-5850-4517-911c-70155efd6efc", + "metadata": {}, + "outputs": [], + "source": [ + "blocks = blocks >> rename(GEO_ID = _.GEOID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f807f1ff-7716-4942-b719-1858dd821b52", + "metadata": {}, + "outputs": [], + "source": [ + "joined = blocks >> inner_join(_, joined, on='GEO_ID')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1647e255-f72a-4fb5-b209-f90850ad00cf", + "metadata": {}, + "outputs": [], + "source": [ + "joined" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "389cd2e2-9141-4944-842d-472efa097d89", + "metadata": {}, + "outputs": [], + "source": [ + "joined = joined >> select(_.geoid == _.GEO_ID, _.pop == _.POP20, _.jobs == _.C000, _.geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f5c9d53-17f1-43b0-871d-4f6d3d443915", + "metadata": {}, + "outputs": [], + "source": [ + "joined.to_parquet('census_jobs_blocks.parquet')" + ] + }, + { + "cell_type": "markdown", + "id": "8c99bcc6-7ebd-445b-ba6b-3f723f02548a", + "metadata": { + "tags": [] + }, + "source": [ + "## Spatial Join (start here)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "e2c29312-9548-45f0-bfc8-ff4e96b76402", + "metadata": {}, + "outputs": [], + "source": [ + "processed_census_path = 'gs://calitp-analytics-data/data-analyses/sb125/quick_mts_jobs/census_jobs_blocks.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "83a114dc-7c0c-499a-8dd2-d8fc22598bf7", + "metadata": {}, + "outputs": [], + "source": [ + "census = gpd.read_parquet(processed_census_path).rename(columns={'pop': 'people'})" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "8b7f2f62-b63a-4ca4-95b4-fddd169b4cb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geoidpeoplejobsgeometry
0606504061120041654POLYGON ((-117.59180 33.97054, -117.59059 33.9...
1611301010320110503POLYGON ((-121.53676 38.58855, -121.53421 38.5...
261130115001002251POLYGON ((-122.25881 38.90023, -122.25799 38.8...
\n", + "
" + ], + "text/plain": [ + " geoid people jobs \\\n", + "0 60650406112004 165 4 \n", + "1 61130101032011 0 503 \n", + "2 61130115001002 25 1 \n", + "\n", + " geometry \n", + "0 POLYGON ((-117.59180 33.97054, -117.59059 33.9... \n", + "1 POLYGON ((-121.53676 38.58855, -121.53421 38.5... \n", + "2 POLYGON ((-122.25881 38.90023, -122.25799 38.8... " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "census >> head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "aaabeb5d-80f2-4c58-ba37-e6e580792013", + "metadata": {}, + "outputs": [], + "source": [ + "hqta = hqta >> select(_.agency_primary, _.hqta_type, _.geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d945c5b5-b836-44e2-9a18-5a1fce9e377e", + "metadata": {}, + "outputs": [], + "source": [ + "hqta = hqta.reset_index() # might actually have use for an ix" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3618e30f-e55c-452d-b73b-152e85a31261", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import geography_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8cab1c45-9271-468c-a926-4a5641a95c70", + "metadata": {}, + "outputs": [], + "source": [ + "hqta = hqta.to_crs(geography_utils.CA_NAD83Albers)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "18d6c363-0373-4692-bba4-7a56b078ef49", + "metadata": {}, + "outputs": [], + "source": [ + "census = census.to_crs(geography_utils.CA_NAD83Albers)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "f267f605-034e-4bb7-983f-5112dab47bb0", + "metadata": {}, + "outputs": [], + "source": [ + "sjoined = hqta.sjoin(census, how=\"inner\", predicate='intersects')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "3775a378-f75c-4a38-8eaa-3443399e9694", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexagency_primaryhqta_typegeometryindex_rightgeoidpeoplejobs
00Alameda-Contra Costa Transit Districthq_corridor_busMULTIPOLYGON (((-184637.774 -36487.978, -18464...125710600143660120015238
1717Alameda-Contra Costa Transit Districthq_corridor_busMULTIPOLYGON (((-184632.513 -36521.325, -18463...125710600143660120015238
2020Alameda-Contra Costa Transit Districthq_corridor_busMULTIPOLYGON (((-184662.574 -36958.651, -18469...125710600143660120015238
2626Alameda-Contra Costa Transit Districthq_corridor_busMULTIPOLYGON (((-183127.422 -40597.359, -18313...125710600143660120015238
3333Alameda-Contra Costa Transit Districthq_corridor_busMULTIPOLYGON (((-183386.671 -44394.580, -18338...125710600143660120015238
...........................
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_railPOLYGON ((-241654.223 58414.285, -241658.099 5...79398609715270230050560
4913749137Sonoma-Marin Area Rail Transit Districtmajor_stop_railPOLYGON ((-241654.554 58414.730, -241658.430 5...21762960971527024015019
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_railPOLYGON ((-241654.223 58414.285, -241658.099 5...21762960971527024015019
4913749137Sonoma-Marin Area Rail Transit Districtmajor_stop_railPOLYGON ((-241654.554 58414.730, -241658.430 5...232450609715270230000549
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_railPOLYGON ((-241654.223 58414.285, -241658.099 5...232450609715270230000549
\n", + "

4478470 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " index agency_primary hqta_type \\\n", + "0 0 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "17 17 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "20 20 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "26 26 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "33 33 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "... ... ... ... \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49137 49137 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49137 49137 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "\n", + " geometry index_right \\\n", + "0 MULTIPOLYGON (((-184637.774 -36487.978, -18464... 125710 \n", + "17 MULTIPOLYGON (((-184632.513 -36521.325, -18463... 125710 \n", + "20 MULTIPOLYGON (((-184662.574 -36958.651, -18469... 125710 \n", + "26 MULTIPOLYGON (((-183127.422 -40597.359, -18313... 125710 \n", + "33 MULTIPOLYGON (((-183386.671 -44394.580, -18338... 125710 \n", + "... ... ... \n", + "49138 POLYGON ((-241654.223 58414.285, -241658.099 5... 79398 \n", + "49137 POLYGON ((-241654.554 58414.730, -241658.430 5... 217629 \n", + "49138 POLYGON ((-241654.223 58414.285, -241658.099 5... 217629 \n", + "49137 POLYGON ((-241654.554 58414.730, -241658.430 5... 232450 \n", + "49138 POLYGON ((-241654.223 58414.285, -241658.099 5... 232450 \n", + "\n", + " geoid people jobs \n", + "0 60014366012001 523 8 \n", + "17 60014366012001 523 8 \n", + "20 60014366012001 523 8 \n", + "26 60014366012001 523 8 \n", + "33 60014366012001 523 8 \n", + "... ... ... ... \n", + "49138 60971527023005 0 560 \n", + "49137 60971527024015 0 19 \n", + "49138 60971527024015 0 19 \n", + "49137 60971527023000 0 549 \n", + "49138 60971527023000 0 549 \n", + "\n", + "[4478470 rows x 8 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sjoined" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "624a8b6d-12e6-4c06-8c5f-e56176002f71", + "metadata": {}, + "outputs": [], + "source": [ + "sjoined = sjoined >> select(-_.geometry, -_.index_right)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e5ef48a1-006f-43af-8557-96f26e4e4a44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexagency_primaryhqta_typegeoidpeoplejobs
00Alameda-Contra Costa Transit Districthq_corridor_bus600143660120015238
1717Alameda-Contra Costa Transit Districthq_corridor_bus600143660120015238
2020Alameda-Contra Costa Transit Districthq_corridor_bus600143660120015238
2626Alameda-Contra Costa Transit Districthq_corridor_bus600143660120015238
3333Alameda-Contra Costa Transit Districthq_corridor_bus600143660120015238
.....................
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_rail609715270230050560
4913749137Sonoma-Marin Area Rail Transit Districtmajor_stop_rail60971527024015019
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_rail60971527024015019
4913749137Sonoma-Marin Area Rail Transit Districtmajor_stop_rail609715270230000549
4913849138Sonoma-Marin Area Rail Transit Districtmajor_stop_rail609715270230000549
\n", + "

4478470 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " index agency_primary hqta_type \\\n", + "0 0 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "17 17 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "20 20 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "26 26 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "33 33 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "... ... ... ... \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49137 49137 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49137 49137 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "49138 49138 Sonoma-Marin Area Rail Transit District major_stop_rail \n", + "\n", + " geoid people jobs \n", + "0 60014366012001 523 8 \n", + "17 60014366012001 523 8 \n", + "20 60014366012001 523 8 \n", + "26 60014366012001 523 8 \n", + "33 60014366012001 523 8 \n", + "... ... ... ... \n", + "49138 60971527023005 0 560 \n", + "49137 60971527024015 0 19 \n", + "49138 60971527024015 0 19 \n", + "49137 60971527023000 0 549 \n", + "49138 60971527023000 0 549 \n", + "\n", + "[4478470 rows x 6 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sjoined" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "021be2a9-f51d-48ff-81d8-950eccbe722d", + "metadata": {}, + "outputs": [], + "source": [ + "stop_grouped = sjoined >> group_by(_.index, _.agency_primary, _.hqta_type) >> summarize(total_jobs = _.jobs.sum(), total_people = _.people.sum(), n = _.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "88d408e3-3dba-4f1f-a606-bdb46f9fabef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexagency_primaryhqta_typetotal_jobstotal_peoplen
00Alameda-Contra Costa Transit Districthq_corridor_bus2275175998498
11Alameda-Contra Costa Transit Districthq_corridor_bus1371481531071406
22Alameda-Contra Costa Transit Districthq_corridor_bus924571546591241
33Alameda-Contra Costa Transit Districthq_corridor_bus1365751420081365
44Alameda-Contra Costa Transit Districthq_corridor_bus8630064626606
.....................
4922249226Southern California Regional Rail Authoritymajor_stop_rail5270978666
4922349227Southern California Regional Rail Authoritymajor_stop_rail77625839117
4922449228Southern California Regional Rail Authoritymajor_stop_rail6638403747
4922549229Southern California Regional Rail Authoritymajor_stop_rail7391319029
4922649230Southern California Regional Rail Authoritymajor_stop_rail3845395736
\n", + "

49227 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " index agency_primary hqta_type \\\n", + "0 0 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "1 1 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "2 2 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "3 3 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "4 4 Alameda-Contra Costa Transit District hq_corridor_bus \n", + "... ... ... ... \n", + "49222 49226 Southern California Regional Rail Authority major_stop_rail \n", + "49223 49227 Southern California Regional Rail Authority major_stop_rail \n", + "49224 49228 Southern California Regional Rail Authority major_stop_rail \n", + "49225 49229 Southern California Regional Rail Authority major_stop_rail \n", + "49226 49230 Southern California Regional Rail Authority major_stop_rail \n", + "\n", + " total_jobs total_people n \n", + "0 22751 75998 498 \n", + "1 137148 153107 1406 \n", + "2 92457 154659 1241 \n", + "3 136575 142008 1365 \n", + "4 86300 64626 606 \n", + "... ... ... ... \n", + "49222 5270 9786 66 \n", + "49223 7762 5839 117 \n", + "49224 6638 4037 47 \n", + "49225 7391 3190 29 \n", + "49226 3845 3957 36 \n", + "\n", + "[49227 rows x 6 columns]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "17791638-a6a9-4c40-80bc-0fa1cd9315d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hqta_typep5_popp20_popp50_popp80_popp95_popp5_jobsp20_jobsp50_jobsp80_jobsp95_jobs
0hq_corridor_bus4156.008735.023452.073206.0181390.752480.56688.015961.049293.0188260.0
1major_stop_brt7200.9514900.623434.535540.058089.401308.43961.412188.580472.4204147.0
2major_stop_bus2505.005479.09829.016966.027301.001028.02119.04878.014676.077209.0
3major_stop_ferry154.002842.86523.010109.811707.00602.01679.014020.0123204.4134539.8
4major_stop_rail2227.706191.011971.025451.656594.001377.73110.88994.045033.0217600.0
\n", + "
" + ], + "text/plain": [ + " hqta_type p5_pop p20_pop p50_pop p80_pop p95_pop p5_jobs \\\n", + "0 hq_corridor_bus 4156.00 8735.0 23452.0 73206.0 181390.75 2480.5 \n", + "1 major_stop_brt 7200.95 14900.6 23434.5 35540.0 58089.40 1308.4 \n", + "2 major_stop_bus 2505.00 5479.0 9829.0 16966.0 27301.00 1028.0 \n", + "3 major_stop_ferry 154.00 2842.8 6523.0 10109.8 11707.00 602.0 \n", + "4 major_stop_rail 2227.70 6191.0 11971.0 25451.6 56594.00 1377.7 \n", + "\n", + " p20_jobs p50_jobs p80_jobs p95_jobs \n", + "0 6688.0 15961.0 49293.0 188260.0 \n", + "1 3961.4 12188.5 80472.4 204147.0 \n", + "2 2119.0 4878.0 14676.0 77209.0 \n", + "3 1679.0 14020.0 123204.4 134539.8 \n", + "4 3110.8 8994.0 45033.0 217600.0 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_type = stop_grouped >> group_by(_.hqta_type) >> summarize(p5_pop = _.total_people.quantile(.05), p20_pop = _.total_people.quantile(.2),\n", + " p50_pop = _.total_people.quantile(.5), \n", + " p80_pop = _.total_people.quantile(.8), p95_pop = _.total_people.quantile(.95),\n", + " p5_jobs = _.total_jobs.quantile(.05),\n", + " p20_jobs = _.total_jobs.quantile(.2), p50_jobs = _.total_jobs.quantile(.5),\n", + " p80_jobs = _.total_jobs.quantile(.8), p95_jobs = _.total_jobs.quantile(.95)\n", + " \n", + " )\n", + "by_type" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "faf99aef-67e3-48cc-9ccc-f89b9b0fedff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hqta_typeagency_primaryp5_popp20_popp50_popp80_popp95_popp5_jobsp20_jobsp50_jobsp80_jobsp95_jobs
0hq_corridor_busAlameda-Contra Costa Transit District18607.0032510.057999.0112739.0165692.003402.0010678.019717.082938.0136575.00
1hq_corridor_busAnaheim Transportation Network4435.108165.411509.013089.614306.4011143.4011402.612986.014761.616697.30
2hq_corridor_busAntelope Valley Transit Authority5615.7014756.825210.042965.254929.401483.404884.610790.019423.8123508.40
3hq_corridor_busBasin Transit1407.902293.22735.02999.04756.90736.50839.61236.01504.02041.60
4hq_corridor_busButte County Association of Governments5120.505511.06705.58930.214957.255126.555839.27259.010440.012140.80
.......................................
283major_stop_railSan Francisco International Airport0.000.0621.02451.63062.707570.008396.68801.08914.010498.40
284major_stop_railSan Joaquin Regional Rail Commission922.803344.64963.07300.812224.351329.452163.24626.012453.216297.30
285major_stop_railSanta Clara Valley Transportation Authority372.004056.69368.512127.013964.051451.303746.415232.528537.433144.45
286major_stop_railSonoma-Marin Area Rail Transit District1946.204546.06977.59425.011176.251362.652498.04282.06514.07734.95
287major_stop_railSouthern California Regional Rail Authority82.553029.65565.59434.211780.00854.952498.86117.011140.434857.00
\n", + "

288 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " hqta_type agency_primary p5_pop \\\n", + "0 hq_corridor_bus Alameda-Contra Costa Transit District 18607.00 \n", + "1 hq_corridor_bus Anaheim Transportation Network 4435.10 \n", + "2 hq_corridor_bus Antelope Valley Transit Authority 5615.70 \n", + "3 hq_corridor_bus Basin Transit 1407.90 \n", + "4 hq_corridor_bus Butte County Association of Governments 5120.50 \n", + ".. ... ... ... \n", + "283 major_stop_rail San Francisco International Airport 0.00 \n", + "284 major_stop_rail San Joaquin Regional Rail Commission 922.80 \n", + "285 major_stop_rail Santa Clara Valley Transportation Authority 372.00 \n", + "286 major_stop_rail Sonoma-Marin Area Rail Transit District 1946.20 \n", + "287 major_stop_rail Southern California Regional Rail Authority 82.55 \n", + "\n", + " p20_pop p50_pop p80_pop p95_pop p5_jobs p20_jobs p50_jobs \\\n", + "0 32510.0 57999.0 112739.0 165692.00 3402.00 10678.0 19717.0 \n", + "1 8165.4 11509.0 13089.6 14306.40 11143.40 11402.6 12986.0 \n", + "2 14756.8 25210.0 42965.2 54929.40 1483.40 4884.6 10790.0 \n", + "3 2293.2 2735.0 2999.0 4756.90 736.50 839.6 1236.0 \n", + "4 5511.0 6705.5 8930.2 14957.25 5126.55 5839.2 7259.0 \n", + ".. ... ... ... ... ... ... ... \n", + "283 0.0 621.0 2451.6 3062.70 7570.00 8396.6 8801.0 \n", + "284 3344.6 4963.0 7300.8 12224.35 1329.45 2163.2 4626.0 \n", + "285 4056.6 9368.5 12127.0 13964.05 1451.30 3746.4 15232.5 \n", + "286 4546.0 6977.5 9425.0 11176.25 1362.65 2498.0 4282.0 \n", + "287 3029.6 5565.5 9434.2 11780.00 854.95 2498.8 6117.0 \n", + "\n", + " p80_jobs p95_jobs \n", + "0 82938.0 136575.00 \n", + "1 14761.6 16697.30 \n", + "2 19423.8 123508.40 \n", + "3 1504.0 2041.60 \n", + "4 10440.0 12140.80 \n", + ".. ... ... \n", + "283 8914.0 10498.40 \n", + "284 12453.2 16297.30 \n", + "285 28537.4 33144.45 \n", + "286 6514.0 7734.95 \n", + "287 11140.4 34857.00 \n", + "\n", + "[288 rows x 12 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_type_operator = stop_grouped >> group_by(_.hqta_type, _.agency_primary) >> summarize(p5_pop = _.total_people.quantile(.05), p20_pop = _.total_people.quantile(.2),\n", + " p50_pop = _.total_people.quantile(.5), \n", + " p80_pop = _.total_people.quantile(.8), p95_pop = _.total_people.quantile(.95),\n", + " p5_jobs = _.total_jobs.quantile(.05),\n", + " p20_jobs = _.total_jobs.quantile(.2), p50_jobs = _.total_jobs.quantile(.5),\n", + " p80_jobs = _.total_jobs.quantile(.8), p95_jobs = _.total_jobs.quantile(.95)\n", + " \n", + " )\n", + "by_type_operator" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b37541b1-a5b7-4c5e-8a1b-ed619c1d7241", + "metadata": {}, + "outputs": [], + "source": [ + "by_type.to_csv('pop_jobs_by_major_stop_type.csv')\n", + "by_type_operator.to_csv('pop_jobs_by_major_stop_type_and_agency.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}