Skip to content

Commit

Permalink
Merge pull request #1317 from cal-itp/ntd_changes2
Browse files Browse the repository at this point in the history
Exploring NTD Proposed Changes 25-26
  • Loading branch information
csuyat-dot authored Dec 5, 2024
2 parents dc5f6d7 + 0e2c33f commit 31686c5
Show file tree
Hide file tree
Showing 2 changed files with 1,561 additions and 1,357 deletions.
176 changes: 78 additions & 98 deletions gtfs_schedule/09_bus_stops_in_ca.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
{
"data": {
"text/plain": [
"array(['3', '4', '0', '2', '1', '0, 3', '5', '2, 3'], dtype=object)"
"array(['3', '2', '2, 3', '1', '4', '0', '0, 3', '5'], dtype=object)"
]
},
"metadata": {},
Expand Down Expand Up @@ -262,7 +262,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(6981, 5)\n"
"(7026, 5)\n"
]
}
],
Expand Down Expand Up @@ -374,8 +374,8 @@
" <th>base64_url</th>\n",
" <th>gtfs_dataset_key</th>\n",
" <th>name</th>\n",
" <th>regional_feed_type</th>\n",
" <th>type</th>\n",
" <th>regional_feed_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -388,8 +388,8 @@
" <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
" <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
" <td>SBMTD Schedule</td>\n",
" <td>None</td>\n",
" <td>schedule</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -405,11 +405,11 @@
" base64_url \\\n",
"40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n",
"\n",
" gtfs_dataset_key name regional_feed_type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n",
" gtfs_dataset_key name type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n",
"\n",
" type \n",
"40 schedule "
" regional_feed_type \n",
"40 None "
]
},
"execution_count": 18,
Expand Down Expand Up @@ -576,8 +576,8 @@
" <th>base64_url</th>\n",
" <th>gtfs_dataset_key</th>\n",
" <th>name</th>\n",
" <th>regional_feed_type</th>\n",
" <th>type</th>\n",
" <th>regional_feed_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -590,8 +590,8 @@
" <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
" <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
" <td>SBMTD Schedule</td>\n",
" <td>None</td>\n",
" <td>schedule</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -607,11 +607,11 @@
" base64_url \\\n",
"40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n",
"\n",
" gtfs_dataset_key name regional_feed_type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n",
" gtfs_dataset_key name type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n",
"\n",
" type \n",
"40 schedule "
" regional_feed_type \n",
"40 None "
]
},
"execution_count": 22,
Expand Down Expand Up @@ -1024,44 +1024,44 @@
"output_type": "stream",
"text": [
"<class 'geopandas.geodataframe.GeoDataFrame'>\n",
"Int64Index: 14610 entries, 29 to 69071\n",
"Int64Index: 28088 entries, 3 to 69071\n",
"Data columns (total 16 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 org_id 14610 non-null object \n",
" 1 agency 14610 non-null object \n",
" 2 stop_id 14610 non-null object \n",
" 3 stop_name 14610 non-null object \n",
" 4 n_routes 13989 non-null float64 \n",
" 5 route_ids_served 13989 non-null object \n",
" 6 routetypes 13989 non-null object \n",
" 7 n_arrivals 13989 non-null float64 \n",
" 8 n_hours_in_service 13989 non-null float64 \n",
" 9 base64_url 14610 non-null object \n",
" 10 geometry 14610 non-null geometry\n",
" 11 date 14610 non-null object \n",
" 12 Route 14610 non-null int64 \n",
" 13 RouteType 14610 non-null object \n",
" 0 org_id 28088 non-null object \n",
" 1 agency 28088 non-null object \n",
" 2 stop_id 28088 non-null object \n",
" 3 stop_name 28088 non-null object \n",
" 4 n_routes 27467 non-null float64 \n",
" 5 route_ids_served 27467 non-null object \n",
" 6 routetypes 27467 non-null object \n",
" 7 n_arrivals 27467 non-null float64 \n",
" 8 n_hours_in_service 27467 non-null float64 \n",
" 9 base64_url 28088 non-null object \n",
" 10 geometry 28088 non-null geometry\n",
" 11 date 28088 non-null object \n",
" 12 Route 28088 non-null int64 \n",
" 13 RouteType 28088 non-null object \n",
" 14 route_id 621 non-null object \n",
" 15 route_type 621 non-null object \n",
"dtypes: float64(3), geometry(1), int64(1), object(11)\n",
"memory usage: 1.9+ MB\n",
"memory usage: 3.6+ MB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 7216 entries, 0 to 7215\n",
"RangeIndex: 7261 entries, 0 to 7260\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 agency 7216 non-null object \n",
" 1 stop_id 7216 non-null object \n",
" 2 stop_name 7216 non-null object \n",
" 3 shn_route_type 7216 non-null object \n",
" 4 date 7216 non-null object \n",
" 5 shn_route 7216 non-null int64 \n",
" 6 routetypes 7044 non-null object \n",
" 7 x 7216 non-null float64\n",
" 8 y 7216 non-null float64\n",
" 0 agency 7261 non-null object \n",
" 1 stop_id 7261 non-null object \n",
" 2 stop_name 7261 non-null object \n",
" 3 shn_route_type 7261 non-null object \n",
" 4 date 7261 non-null object \n",
" 5 shn_route 7261 non-null int64 \n",
" 6 routetypes 7089 non-null object \n",
" 7 x 7261 non-null float64\n",
" 8 y 7261 non-null float64\n",
"dtypes: float64(2), int64(1), object(6)\n",
"memory usage: 507.5+ KB\n"
"memory usage: 510.7+ KB\n"
]
},
{
Expand Down Expand Up @@ -1097,7 +1097,7 @@
"metadata": {},
"outputs": [],
"source": [
"stops_for_export.to_csv(\"ca_stops_revised.csv\", index=False)"
"stops_for_export.to_csv(f\"{SCHED_GCS}ca_stops_revised.csv\", index=False)"
]
},
{
Expand Down Expand Up @@ -1230,7 +1230,7 @@
"outputs": [
{
"ename": "KeyError",
"evalue": "'Palo Verde Valley Transit Agency'",
"evalue": "'Butte County Association of Governments'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
Expand All @@ -1241,7 +1241,7 @@
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/core/apply.py:1174\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1172\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1173\u001b[0m values \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m)\u001b[38;5;241m.\u001b[39m_values\n\u001b[0;32m-> 1174\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1175\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1176\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1177\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1181\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1182\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/_libs/lib.pyx:2924\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"Cell \u001b[0;32mIn[52], line 1\u001b[0m, in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0m to_map[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolor\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m to_map\u001b[38;5;241m.\u001b[39magency\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: hex_to_rgb(\u001b[43mcolor_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mx\u001b[49m\u001b[43m]\u001b[49m[\u001b[38;5;241m1\u001b[39m:]))\n",
"\u001b[0;31mKeyError\u001b[0m: 'Palo Verde Valley Transit Agency'"
"\u001b[0;31mKeyError\u001b[0m: 'Butte County Association of Governments'"
]
}
],
Expand All @@ -1259,33 +1259,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to calitp-map-tiles/shs_stops/shs.geojson.gz\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n",
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to calitp-map-tiles/shs_stops/stops4.geojson.gz\n"
]
}
],
"source": [
Expand Down Expand Up @@ -1315,9 +1295,9 @@
" {'name': 'SHS with Stops Sep 2024',\n",
" 'url': 'https://storage.googleapis.com/calitp-map-tiles/shs_stops/stops4.geojson.gz',\n",
" 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],\n",
" 'lat_lon': (36.067266532548935, -119.93886599913799),\n",
" 'lat_lon': (36.10085893610959, -119.97596894016574),\n",
" 'zoom': 13},\n",
" 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjA2NzI2NjUzMjU0ODkzNSwgLTExOS45Mzg4NjU5OTkxMzc5OV0sICJ6b29tIjogMTN9'}"
" 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjEwMDg1ODkzNjEwOTU5LCAtMTE5Ljk3NTk2ODk0MDE2NTc0XSwgInpvb20iOiAxM30='}"
]
},
"execution_count": 54,
Expand Down Expand Up @@ -1415,20 +1395,20 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 165 entries, 0 to 164\n",
"Int64Index: 166 entries, 0 to 165\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 organization_key 165 non-null object\n",
" 1 organization_name 165 non-null object\n",
" 2 service_key 165 non-null object\n",
" 3 service_name 165 non-null object\n",
" 4 funding_program_key 165 non-null object\n",
" 5 funding_program_name 165 non-null object\n",
" 0 organization_key 166 non-null object\n",
" 1 organization_name 166 non-null object\n",
" 2 service_key 166 non-null object\n",
" 3 service_name 166 non-null object\n",
" 4 funding_program_key 166 non-null object\n",
" 5 funding_program_name 166 non-null object\n",
"dtypes: object(6)\n",
"memory usage: 9.0+ KB\n",
"memory usage: 9.1+ KB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 90 entries, 0 to 163\n",
"Int64Index: 90 entries, 0 to 164\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
Expand Down Expand Up @@ -1532,30 +1512,30 @@
"output_type": "stream",
"text": [
"<class 'geopandas.geodataframe.GeoDataFrame'>\n",
"RangeIndex: 3390 entries, 0 to 3389\n",
"RangeIndex: 3470 entries, 0 to 3469\n",
"Data columns (total 18 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 org_id 3390 non-null object \n",
" 1 agency 3390 non-null object \n",
" 2 stop_id 3390 non-null object \n",
" 3 stop_name 3390 non-null object \n",
" 4 n_routes 3309 non-null float64 \n",
" 5 route_ids_served 3309 non-null object \n",
" 6 routetypes 3309 non-null object \n",
" 7 n_arrivals 3309 non-null float64 \n",
" 8 n_hours_in_service 3309 non-null float64 \n",
" 9 base64_url 3390 non-null object \n",
" 10 geometry 3390 non-null geometry\n",
" 11 date 3390 non-null object \n",
" 12 shn_route 3390 non-null int64 \n",
" 13 shn_route_type 3390 non-null object \n",
" 0 org_id 3470 non-null object \n",
" 1 agency 3470 non-null object \n",
" 2 stop_id 3470 non-null object \n",
" 3 stop_name 3470 non-null object \n",
" 4 n_routes 3389 non-null float64 \n",
" 5 route_ids_served 3389 non-null object \n",
" 6 routetypes 3389 non-null object \n",
" 7 n_arrivals 3389 non-null float64 \n",
" 8 n_hours_in_service 3389 non-null float64 \n",
" 9 base64_url 3470 non-null object \n",
" 10 geometry 3470 non-null geometry\n",
" 11 date 3470 non-null object \n",
" 12 shn_route 3470 non-null int64 \n",
" 13 shn_route_type 3470 non-null object \n",
" 14 route_id 81 non-null object \n",
" 15 route_type 81 non-null object \n",
" 16 organization_key 3390 non-null object \n",
" 17 5311_agency 3390 non-null bool \n",
" 16 organization_key 3470 non-null object \n",
" 17 5311_agency 3470 non-null bool \n",
"dtypes: bool(1), float64(3), geometry(1), int64(1), object(12)\n",
"memory usage: 453.7+ KB\n",
"memory usage: 464.4+ KB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 69 entries, 0 to 68\n",
"Data columns (total 3 columns):\n",
Expand Down Expand Up @@ -1605,10 +1585,10 @@
"outputs": [],
"source": [
"# exporting gdf w/ 5311 flag to csv\n",
"gdf_merge.to_csv(\"ca_stops_revised_5311.csv\", index=False)\n",
"gdf_merge.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311.csv\", index=False)\n",
"\n",
"# exporting gdf_add to csv\n",
"gdf_agg.to_csv(\"ca_stops_revised_5311_agencies.csv\", index=False)"
"gdf_agg.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311_agencies.csv\", index=False)"
]
},
{
Expand Down
Loading

0 comments on commit 31686c5

Please sign in to comment.