diff --git a/rt_segment_speeds/23_project_all_vp_explore.ipynb b/rt_segment_speeds/23_project_all_vp_explore.ipynb index 190bc613d..4f682dd7f 100644 --- a/rt_segment_speeds/23_project_all_vp_explore.ipynb +++ b/rt_segment_speeds/23_project_all_vp_explore.ipynb @@ -17,13 +17,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "73ced776-d521-4467-beb4-8d67d147aea4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "DataTransformerRegistry.enable('default')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import altair as alt\n", - "import dask.dataframe as dd\n", "import geopandas as gpd\n", "import numpy as np\n", "import pandas as pd\n", @@ -31,13 +41,16 @@ "from segment_speed_utils import helpers\n", "from segment_speed_utils.project_vars import SEGMENT_GCS, PROJECT_CRS\n", "from shared_utils import rt_dates\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "\n", + "analysis_date = rt_dates.DATES[\"sep2023\"]\n", "\n", - "analysis_date = rt_dates.DATES[\"sep2023\"]" + "alt.data_transformers.disable_max_rows()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "c289f689-c0fe-4ea2-9e14-4e80f77566ae", "metadata": {}, "outputs": [], @@ -62,7 +75,8 @@ "# Find whether it's loop or inlining\n", "shapes_loop_inlining = pd.read_parquet(\n", " f\"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet\",\n", - " columns = [\"shape_array_key\", \"loop_or_inlining\"]\n", + " columns = [\"shape_array_key\", \"loop_or_inlining\"],\n", + " filters = [[(\"loop_or_inlining\", \"==\", 1)]]\n", ").drop_duplicates().merge(\n", " trip_to_shape,\n", " on = \"shape_array_key\",\n", @@ -72,185 +86,1171 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "1b0ba620-e24e-4197-b665-c6111ad03307", + "execution_count": 3, + "id": "b95e35da-cf34-4696-80a1-0ab3a9e99ebf", "metadata": {}, "outputs": [], "source": [ - "ok_trips = (shapes_loop_inlining[\n", - " shapes_loop_inlining.loop_or_inlining==0]\n", - " .sample(25).trip_instance_key.tolist()\n", - " )\n", - "\n", - "ok_trips" + "loopy_trips = shapes_loop_inlining.trip_instance_key.tolist()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b95e35da-cf34-4696-80a1-0ab3a9e99ebf", + "execution_count": 4, + "id": "20eb7d88-63e5-4695-82fd-141e8712396d", "metadata": {}, "outputs": [], "source": [ - "loopy_trips = (shapes_loop_inlining[\n", - " shapes_loop_inlining.loop_or_inlining==1]\n", - " .sample(25).trip_instance_key.tolist()\n", - " )\n", + "def merge_vp_with_projected_shape_meters(\n", + " analysis_date,\n", + " vp_filters: tuple\n", + "):\n", + " projected_shape_meters = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}projection/vp_projected_{analysis_date}.parquet\"\n", + " )\n", "\n", - "loopy_trips" + " vp = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", + " filters = vp_filters,\n", + " columns = [\"gtfs_dataset_key\", \"gtfs_dataset_name\", \n", + " \"vp_idx\", \"trip_instance_key\", \"trip_id\"]\n", + " ).merge(\n", + " projected_shape_meters,\n", + " on = \"vp_idx\",\n", + " how = \"inner\"\n", + " )\n", + " \n", + " return vp" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "276cab58-94ff-4f72-a651-88a4f1ea890a", "metadata": {}, "outputs": [], "source": [ - "subset_trips = ok_trips + loopy_trips\n", - "\n", - "projected_shape_meters = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}projection/vp_projected_{analysis_date}.parquet\"\n", - ")\n", - "\n", - "vp = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", - " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", - ").merge(\n", - " projected_shape_meters,\n", - " on = \"vp_idx\",\n", - " how = \"inner\"\n", - ").drop(columns = \"location_timestamp\")" + "vp = merge_vp_with_projected_shape_meters(\n", + " analysis_date,\n", + " vp_filters = [[(\"trip_instance_key\", \"in\", loopy_trips)]]\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "9d8f339f-f832-45f2-9f2c-f96125799a38", + "execution_count": 6, + "id": "1fbfe823-6a98-40b8-8c71-87cf686e5830", "metadata": {}, "outputs": [], "source": [ - "def plot_shape_meters(df: pd.DataFrame, one_trip: str):\n", - " \"\"\"\n", - " Plot how the projected shape meters looks for one trip.\n", - " \n", - " vp_idx is ordered by timestamp, use as x.\n", - " \"\"\"\n", - " subset_df = df[df.trip_instance_key==one_trip]\n", - " \n", - " print(f\"{subset_df.gtfs_dataset_name.iloc[0]}\")\n", - " print(f\"trip_instance_key: {one_trip}, trip_id: {subset_df.trip_id.iloc[0]}\")\n", + "#https://stackoverflow.com/questions/10226551/whats-the-most-pythonic-way-to-calculate-percentage-changes-on-a-list-of-numbers\n", + "def pct_change(arr):\n", + " if not isinstance(arr, np.ndarray):\n", + " arr = np.asarray(arr)\n", + " \n", + " pct = np.zeros_like(arr)\n", + " pct[1:] = np.diff(arr) / np.abs(arr[:-1])\n", " \n", - " chart = (alt.Chart(subset_df)\n", - " .mark_line()\n", - " .encode(\n", - " x=\"vp_idx\",\n", - " y=\"shape_meters:Q\"\n", - " )\n", - " )\n", - " \n", - " display(chart)\n" + " return pct * 100\n", + "\n", + "def number_of_elements_over_threshold(\n", + " arr, \n", + " pct_change_threshold\n", + "):\n", + " return len((pct_change(arr) > pct_change_threshold).nonzero()[0])" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6aaf6652-307b-41ba-9059-72ae010b7928", + "execution_count": 7, + "id": "de934f1c-f5db-4c89-ba5a-d84a48b18ce3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_871/1096090739.py:7: RuntimeWarning: divide by zero encountered in divide\n", + " pct[1:] = np.diff(arr) / np.abs(arr[:-1])\n", + "/tmp/ipykernel_871/1096090739.py:7: RuntimeWarning: invalid value encountered in divide\n", + " pct[1:] = np.diff(arr) / np.abs(arr[:-1])\n" + ] + } + ], "source": [ - "for t in ok_trips:\n", - " plot_shape_meters(vp, t)" + "# Find most jumpy trips\n", + "vp2 = (vp.groupby(\"trip_instance_key\")\n", + " .agg({\"shape_meters\": lambda x: list(x)})\n", + " .reset_index()\n", + " )\n", + "\n", + "vp2 = vp2.assign(\n", + " big_jumps = vp2.apply(\n", + " lambda x: \n", + " number_of_elements_over_threshold(\n", + " x.shape_meters, 20), axis=1\n", + " )\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "284fb053-ffc9-411a-ab46-82f6cfb08af1", + "execution_count": 27, + "id": "29a91576-ae53-4464-b0fa-40291431aa16", "metadata": {}, "outputs": [], "source": [ - "for t in loopy_trips:\n", - " plot_shape_meters(vp, t)" + "all_colors = (cp.CALITP_CATEGORY_BRIGHT_COLORS + cp.CALITP_CATEGORY_BOLD_COLORS + \n", + " cp.CALITP_DIVERGING_COLORS + cp.CALITP_SEQUENTIAL_COLORS\n", + " )\n", + "\n", + "\n", + "final_trip_keys = {\n", + " \"afaf17f1c2816652f0e4522a5c7f206b\": cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # anaheim orange\n", + " \"a880d82a382929aa1de15be733f10a51\": cp.CALITP_CATEGORY_BOLD_COLORS[2], # kings green\n", + " \"d628de22f56dbb4c0e3f8242a2fe78d3\": cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # vine blue\n", + " \"f1a0a79baa78db2c26b3248a30662a7b\": cp.CALITP_CATEGORY_BRIGHT_COLORS[5], # presidgo purple\n", + " \"26146503b5bf0235c0c8fe98dcd8d90b\": cp.CALITP_SEQUENTIAL_COLORS[4], # dumbarton navy\n", + " \"4f76b1c357ee534ac931a5c1bd1cbb87\": cp.CALITP_CATEGORY_BOLD_COLORS[2], # santa maria yellow\n", + " \"e5bcf460be0899f437b1e53b7f3feced\": cp.CALITP_CATEGORY_BOLD_COLORS[4], # tcrta gray blue\n", + " \"18cc0764a463566e8690f0d44c32c199\": cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # sd green\n", + " \"45d8634a92be1fa10ae4f4aa5aa6d5b9\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4], # foothill light blue\n", + " \"aa851696959462180fe04f189dc75584\": cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # big blue bus green\n", + "}" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ced6dbff-008e-4ccf-b284-cc1d79d3e801", + "execution_count": 28, + "id": "6c7f04b1-9966-4c6c-980d-a63e61c814f1", "metadata": {}, "outputs": [], "source": [ - "speed = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}speeds_comparison_{analysis_date}.parquet\"\n", + "shapes = helpers.import_scheduled_shapes(\n", + " analysis_date, \n", + " columns = [\"shape_array_key\", \"shape_id\", \"geometry\"],\n", + " get_pandas = True,\n", + " crs = \"EPSG:4326\"\n", + ").merge(\n", + " shapes_loop_inlining[\n", + " shapes_loop_inlining.trip_instance_key.isin(final_trip_keys.keys())],\n", + " on = \"shape_array_key\",\n", + " how = \"inner\"\n", ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "8243b13f-323b-4b0f-8fff-75cd2837d165", - "metadata": {}, - "outputs": [], - "source": [ - "trip = \"10096002510743-JUNE23\"\n", - "speed[speed.trip_id==trip]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "394956b9-a02c-4b7d-b604-3775c4e55a51", + "execution_count": 29, + "id": "2339c9ab-2241-4087-abf8-cee4d1b2a9cc", "metadata": {}, "outputs": [], "source": [ - "metro_trip = helpers.import_scheduled_trips(\n", - " analysis_date,\n", - " columns = [\"trip_instance_key\", \"trip_id\"],\n", - " filters = [[(\"trip_id\", \"==\", trip)]],\n", - " get_pandas = True\n", + "gdf = pd.merge(\n", + " shapes,\n", + " vp,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", ")\n", "\n", - "trip_key = metro_trip.trip_instance_key.iloc[0]" + "# assign colors\n", + "gdf = gdf.assign(\n", + " color = gdf.trip_instance_key.map(final_trip_keys),\n", + " gtfs_dataset_name = gdf.gtfs_dataset_name.str.replace(\"VehiclePositions\", \"Vehicle Positions\")\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "5c5b8f1d-6571-4072-a9ea-dc1c36173453", + "execution_count": 30, + "id": "9d8f339f-f832-45f2-9f2c-f96125799a38", "metadata": {}, "outputs": [], "source": [ - "vp_pared = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}vp_pared_stops_{analysis_date}\",\n", - " filters = [[(\"trip_instance_key\", \"==\", trip_key)]])" + "def make_chart_map(df: pd.DataFrame, one_trip: str):\n", + " \"\"\"\n", + " Plot how the projected shape meters looks for one trip.\n", + " \n", + " vp_idx is ordered by timestamp, use as x.\n", + " \"\"\"\n", + " subset_df = df[df.trip_instance_key==one_trip].drop(columns = \"geometry\")\n", + " subset_gdf = df[df.trip_instance_key==one_trip][[\n", + " \"shape_id\", \"geometry\"]].drop_duplicates()\n", + " \n", + " GTFS_DATA = subset_df.gtfs_dataset_name.iloc[0]\n", + " TRIP_ID = subset_df.trip_id.iloc[0]\n", + " COLOR = subset_df.color.iloc[0]\n", + " \n", + " grid_bool = False\n", + " grid_opacity = 0\n", + " WIDTH = 400\n", + " HEIGHT = 400\n", + " \n", + " chart = (alt.Chart(subset_df)\n", + " .mark_line(color=COLOR)\n", + " .encode(\n", + " x=alt.X(\"vp_idx\", \n", + " title = \"\", \n", + " axis=alt.Axis(labels=False, \n", + " gridOpacity=grid_opacity, \n", + " tickOpacity=grid_opacity)\n", + " ),\n", + " y=alt.Y(\"shape_meters:Q\", \n", + " title = \"\",\n", + " axis=alt.Axis(labels=False, \n", + " gridOpacity=grid_opacity, \n", + " tickOpacity=grid_opacity)\n", + " ),\n", + " ).properties(\n", + " title = {\"text\": [GTFS_DATA], \n", + " \"subtitle\": f\"trip_id: {TRIP_ID}\"}\n", + " )\n", + " )\n", + " \n", + " chart = (chart\n", + " .configure_axis(grid=grid_bool, labelFontSize=0)\n", + " .configure_axis(gridOpacity=grid_opacity, \n", + " domainOpacity=grid_opacity)\n", + " .configure_axisBand(grid=grid_bool)\n", + " .configure_view(strokeOpacity=grid_opacity)\n", + " .properties(width = WIDTH*1.2, height = HEIGHT)\n", + " )\n", + " \n", + " display(chart)\n", + " \n", + " m = subset_gdf.explore(\n", + " \"shape_id\", \n", + " tiles = \"CartoDB Positron\",\n", + " legend = False,\n", + " style_kwds = {\n", + " \"color\": COLOR, \n", + " },\n", + " map_kwds = {\n", + " \"dragging\": False,\n", + " \"scrollWheelZoom\": False,\n", + " \"tileOpacity\": 0\n", + " },\n", + " zoom_control = False,\n", + " width = WIDTH, height = HEIGHT\n", + " )\n", + " \n", + " display(m)\n" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c83f420e-99a2-40d8-b3ef-5d13f68b4226", + "execution_count": 31, + "id": "284fb053-ffc9-411a-ab46-82f6cfb08af1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "