Merge pull request #1317 from cal-itp/ntd_changes2

Exploring NTD Proposed Changes 25-26
cal-itp · Dec 5, 2024 · 31686c5 · 31686c5
2 parents dc5f6d7 + 0e2c33f
commit 31686c5
Show file tree

Hide file tree

Showing 2 changed files with 1,561 additions and 1,357 deletions.
diff --git a/gtfs_schedule/09_bus_stops_in_ca.ipynb b/gtfs_schedule/09_bus_stops_in_ca.ipynb
@@ -179,7 +179,7 @@
     {
      "data": {
       "text/plain": [
-       "array(['3', '4', '0', '2', '1', '0, 3', '5', '2, 3'], dtype=object)"
+       "array(['3', '2', '2, 3', '1', '4', '0', '0, 3', '5'], dtype=object)"
       ]
      },
      "metadata": {},
@@ -262,7 +262,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(6981, 5)\n"
+      "(7026, 5)\n"
      ]
     }
    ],
@@ -374,8 +374,8 @@
        "      <th>base64_url</th>\n",
        "      <th>gtfs_dataset_key</th>\n",
        "      <th>name</th>\n",
-       "      <th>regional_feed_type</th>\n",
        "      <th>type</th>\n",
+       "      <th>regional_feed_type</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -388,8 +388,8 @@
        "      <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
        "      <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
        "      <td>SBMTD Schedule</td>\n",
-       "      <td>None</td>\n",
        "      <td>schedule</td>\n",
+       "      <td>None</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -405,11 +405,11 @@
        "                                           base64_url  \\\n",
        "40  aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...   \n",
        "\n",
-       "                    gtfs_dataset_key            name regional_feed_type  \\\n",
-       "40  239f3baf3dd3b9e9464f66a777f9897d  SBMTD Schedule               None   \n",
+       "                    gtfs_dataset_key            name      type  \\\n",
+       "40  239f3baf3dd3b9e9464f66a777f9897d  SBMTD Schedule  schedule   \n",
        "\n",
-       "        type  \n",
-       "40  schedule  "
+       "   regional_feed_type  \n",
+       "40               None  "
       ]
      },
      "execution_count": 18,
@@ -576,8 +576,8 @@
        "      <th>base64_url</th>\n",
        "      <th>gtfs_dataset_key</th>\n",
        "      <th>name</th>\n",
-       "      <th>regional_feed_type</th>\n",
        "      <th>type</th>\n",
+       "      <th>regional_feed_type</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -590,8 +590,8 @@
        "      <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
        "      <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
        "      <td>SBMTD Schedule</td>\n",
-       "      <td>None</td>\n",
        "      <td>schedule</td>\n",
+       "      <td>None</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -607,11 +607,11 @@
        "                                           base64_url  \\\n",
        "40  aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...   \n",
        "\n",
-       "                    gtfs_dataset_key            name regional_feed_type  \\\n",
-       "40  239f3baf3dd3b9e9464f66a777f9897d  SBMTD Schedule               None   \n",
+       "                    gtfs_dataset_key            name      type  \\\n",
+       "40  239f3baf3dd3b9e9464f66a777f9897d  SBMTD Schedule  schedule   \n",
        "\n",
-       "        type  \n",
-       "40  schedule  "
+       "   regional_feed_type  \n",
+       "40               None  "
       ]
      },
      "execution_count": 22,
@@ -1024,44 +1024,44 @@
      "output_type": "stream",
      "text": [
       "<class 'geopandas.geodataframe.GeoDataFrame'>\n",
-      "Int64Index: 14610 entries, 29 to 69071\n",
+      "Int64Index: 28088 entries, 3 to 69071\n",
       "Data columns (total 16 columns):\n",
       " #   Column              Non-Null Count  Dtype   \n",
       "---  ------              --------------  -----   \n",
-      " 0   org_id              14610 non-null  object  \n",
-      " 1   agency              14610 non-null  object  \n",
-      " 2   stop_id             14610 non-null  object  \n",
-      " 3   stop_name           14610 non-null  object  \n",
-      " 4   n_routes            13989 non-null  float64 \n",
-      " 5   route_ids_served    13989 non-null  object  \n",
-      " 6   routetypes          13989 non-null  object  \n",
-      " 7   n_arrivals          13989 non-null  float64 \n",
-      " 8   n_hours_in_service  13989 non-null  float64 \n",
-      " 9   base64_url          14610 non-null  object  \n",
-      " 10  geometry            14610 non-null  geometry\n",
-      " 11  date                14610 non-null  object  \n",
-      " 12  Route               14610 non-null  int64   \n",
-      " 13  RouteType           14610 non-null  object  \n",
+      " 0   org_id              28088 non-null  object  \n",
+      " 1   agency              28088 non-null  object  \n",
+      " 2   stop_id             28088 non-null  object  \n",
+      " 3   stop_name           28088 non-null  object  \n",
+      " 4   n_routes            27467 non-null  float64 \n",
+      " 5   route_ids_served    27467 non-null  object  \n",
+      " 6   routetypes          27467 non-null  object  \n",
+      " 7   n_arrivals          27467 non-null  float64 \n",
+      " 8   n_hours_in_service  27467 non-null  float64 \n",
+      " 9   base64_url          28088 non-null  object  \n",
+      " 10  geometry            28088 non-null  geometry\n",
+      " 11  date                28088 non-null  object  \n",
+      " 12  Route               28088 non-null  int64   \n",
+      " 13  RouteType           28088 non-null  object  \n",
       " 14  route_id            621 non-null    object  \n",
       " 15  route_type          621 non-null    object  \n",
       "dtypes: float64(3), geometry(1), int64(1), object(11)\n",
-      "memory usage: 1.9+ MB\n",
+      "memory usage: 3.6+ MB\n",
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 7216 entries, 0 to 7215\n",
+      "RangeIndex: 7261 entries, 0 to 7260\n",
       "Data columns (total 9 columns):\n",
       " #   Column          Non-Null Count  Dtype  \n",
       "---  ------          --------------  -----  \n",
-      " 0   agency          7216 non-null   object \n",
-      " 1   stop_id         7216 non-null   object \n",
-      " 2   stop_name       7216 non-null   object \n",
-      " 3   shn_route_type  7216 non-null   object \n",
-      " 4   date            7216 non-null   object \n",
-      " 5   shn_route       7216 non-null   int64  \n",
-      " 6   routetypes      7044 non-null   object \n",
-      " 7   x               7216 non-null   float64\n",
-      " 8   y               7216 non-null   float64\n",
+      " 0   agency          7261 non-null   object \n",
+      " 1   stop_id         7261 non-null   object \n",
+      " 2   stop_name       7261 non-null   object \n",
+      " 3   shn_route_type  7261 non-null   object \n",
+      " 4   date            7261 non-null   object \n",
+      " 5   shn_route       7261 non-null   int64  \n",
+      " 6   routetypes      7089 non-null   object \n",
+      " 7   x               7261 non-null   float64\n",
+      " 8   y               7261 non-null   float64\n",
       "dtypes: float64(2), int64(1), object(6)\n",
-      "memory usage: 507.5+ KB\n"
+      "memory usage: 510.7+ KB\n"
      ]
     },
     {
@@ -1097,7 +1097,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stops_for_export.to_csv(\"ca_stops_revised.csv\", index=False)"
+    "stops_for_export.to_csv(f\"{SCHED_GCS}ca_stops_revised.csv\", index=False)"
    ]
   },
   {
@@ -1230,7 +1230,7 @@
    "outputs": [
     {
      "ename": "KeyError",
-     "evalue": "'Palo Verde Valley Transit Agency'",
+     "evalue": "'Butte County Association of Governments'",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
@@ -1241,7 +1241,7 @@
       "File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/core/apply.py:1174\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1172\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1173\u001b[0m         values \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m)\u001b[38;5;241m.\u001b[39m_values\n\u001b[0;32m-> 1174\u001b[0m         mapped \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1175\u001b[0m \u001b[43m            \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1176\u001b[0m \u001b[43m            \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1177\u001b[0m \u001b[43m            \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1178\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m   1181\u001b[0m     \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m   1182\u001b[0m     \u001b[38;5;66;03m#  See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m   1183\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
       "File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/_libs/lib.pyx:2924\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
       "Cell \u001b[0;32mIn[52], line 1\u001b[0m, in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0m to_map[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolor\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m to_map\u001b[38;5;241m.\u001b[39magency\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: hex_to_rgb(\u001b[43mcolor_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mx\u001b[49m\u001b[43m]\u001b[49m[\u001b[38;5;241m1\u001b[39m:]))\n",
-      "\u001b[0;31mKeyError\u001b[0m: 'Palo Verde Valley Transit Agency'"
+      "\u001b[0;31mKeyError\u001b[0m: 'Butte County Association of Governments'"
      ]
     }
    ],
@@ -1259,33 +1259,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
+      "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
       "\n",
-      "  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "writing to calitp-map-tiles/shs_stops/shs.geojson.gz\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
+      "  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n",
+      "/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
       "\n",
       "  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
      ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "writing to calitp-map-tiles/shs_stops/stops4.geojson.gz\n"
-     ]
     }
    ],
    "source": [
@@ -1315,9 +1295,9 @@
        "   {'name': 'SHS with Stops Sep 2024',\n",
        "    'url': 'https://storage.googleapis.com/calitp-map-tiles/shs_stops/stops4.geojson.gz',\n",
        "    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],\n",
-       "  'lat_lon': (36.067266532548935, -119.93886599913799),\n",
+       "  'lat_lon': (36.10085893610959, -119.97596894016574),\n",
        "  'zoom': 13},\n",
-       " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjA2NzI2NjUzMjU0ODkzNSwgLTExOS45Mzg4NjU5OTkxMzc5OV0sICJ6b29tIjogMTN9'}"
+       " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjEwMDg1ODkzNjEwOTU5LCAtMTE5Ljk3NTk2ODk0MDE2NTc0XSwgInpvb20iOiAxM30='}"
       ]
      },
      "execution_count": 54,
@@ -1415,20 +1395,20 @@
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "Int64Index: 165 entries, 0 to 164\n",
+      "Int64Index: 166 entries, 0 to 165\n",
       "Data columns (total 6 columns):\n",
       " #   Column                Non-Null Count  Dtype \n",
       "---  ------                --------------  ----- \n",
-      " 0   organization_key      165 non-null    object\n",
-      " 1   organization_name     165 non-null    object\n",
-      " 2   service_key           165 non-null    object\n",
-      " 3   service_name          165 non-null    object\n",
-      " 4   funding_program_key   165 non-null    object\n",
-      " 5   funding_program_name  165 non-null    object\n",
+      " 0   organization_key      166 non-null    object\n",
+      " 1   organization_name     166 non-null    object\n",
+      " 2   service_key           166 non-null    object\n",
+      " 3   service_name          166 non-null    object\n",
+      " 4   funding_program_key   166 non-null    object\n",
+      " 5   funding_program_name  166 non-null    object\n",
       "dtypes: object(6)\n",
-      "memory usage: 9.0+ KB\n",
+      "memory usage: 9.1+ KB\n",
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "Int64Index: 90 entries, 0 to 163\n",
+      "Int64Index: 90 entries, 0 to 164\n",
       "Data columns (total 2 columns):\n",
       " #   Column            Non-Null Count  Dtype \n",
       "---  ------            --------------  ----- \n",
@@ -1532,30 +1512,30 @@
      "output_type": "stream",
      "text": [
       "<class 'geopandas.geodataframe.GeoDataFrame'>\n",
-      "RangeIndex: 3390 entries, 0 to 3389\n",
+      "RangeIndex: 3470 entries, 0 to 3469\n",
       "Data columns (total 18 columns):\n",
       " #   Column              Non-Null Count  Dtype   \n",
       "---  ------              --------------  -----   \n",
-      " 0   org_id              3390 non-null   object  \n",
-      " 1   agency              3390 non-null   object  \n",
-      " 2   stop_id             3390 non-null   object  \n",
-      " 3   stop_name           3390 non-null   object  \n",
-      " 4   n_routes            3309 non-null   float64 \n",
-      " 5   route_ids_served    3309 non-null   object  \n",
-      " 6   routetypes          3309 non-null   object  \n",
-      " 7   n_arrivals          3309 non-null   float64 \n",
-      " 8   n_hours_in_service  3309 non-null   float64 \n",
-      " 9   base64_url          3390 non-null   object  \n",
-      " 10  geometry            3390 non-null   geometry\n",
-      " 11  date                3390 non-null   object  \n",
-      " 12  shn_route           3390 non-null   int64   \n",
-      " 13  shn_route_type      3390 non-null   object  \n",
+      " 0   org_id              3470 non-null   object  \n",
+      " 1   agency              3470 non-null   object  \n",
+      " 2   stop_id             3470 non-null   object  \n",
+      " 3   stop_name           3470 non-null   object  \n",
+      " 4   n_routes            3389 non-null   float64 \n",
+      " 5   route_ids_served    3389 non-null   object  \n",
+      " 6   routetypes          3389 non-null   object  \n",
+      " 7   n_arrivals          3389 non-null   float64 \n",
+      " 8   n_hours_in_service  3389 non-null   float64 \n",
+      " 9   base64_url          3470 non-null   object  \n",
+      " 10  geometry            3470 non-null   geometry\n",
+      " 11  date                3470 non-null   object  \n",
+      " 12  shn_route           3470 non-null   int64   \n",
+      " 13  shn_route_type      3470 non-null   object  \n",
       " 14  route_id            81 non-null     object  \n",
       " 15  route_type          81 non-null     object  \n",
-      " 16  organization_key    3390 non-null   object  \n",
-      " 17  5311_agency         3390 non-null   bool    \n",
+      " 16  organization_key    3470 non-null   object  \n",
+      " 17  5311_agency         3470 non-null   bool    \n",
       "dtypes: bool(1), float64(3), geometry(1), int64(1), object(12)\n",
-      "memory usage: 453.7+ KB\n",
+      "memory usage: 464.4+ KB\n",
       "<class 'pandas.core.frame.DataFrame'>\n",
       "RangeIndex: 69 entries, 0 to 68\n",
       "Data columns (total 3 columns):\n",
@@ -1605,10 +1585,10 @@
    "outputs": [],
    "source": [
     "# exporting gdf w/ 5311 flag to csv\n",
-    "gdf_merge.to_csv(\"ca_stops_revised_5311.csv\", index=False)\n",
+    "gdf_merge.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311.csv\", index=False)\n",
     "\n",
     "# exporting gdf_add to csv\n",
-    "gdf_agg.to_csv(\"ca_stops_revised_5311_agencies.csv\", index=False)"
+    "gdf_agg.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311_agencies.csv\", index=False)"
    ]
   },
   {