Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tiffany switch imports from shared_utils to calitp_data_analysis #907

Merged
merged 14 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions bus_service_increase/A3_service_increase_estimator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
"ix = pd.IndexSlice\n",
"\n",
"from utils import *\n",
"import shared_utils\n",
"\n",
"from siuba import *"
]
},
Expand Down Expand Up @@ -704,7 +702,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
53 changes: 13 additions & 40 deletions bus_service_increase/C1_transit_near_highways.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@
],
"source": [
"import branca\n",
"import folium\n",
"import geopandas as gpd\n",
"import intake\n",
"import ipywidgets as widgets\n",
"import pandas as pd\n",
"\n",
"from IPython.display import Markdown, HTML\n",
"\n",
"import setup_corridors_stats\n",
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
"from shared_utils import geography_utils\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"catalog = intake.open_catalog(\"./*.yml\")"
]
Expand Down Expand Up @@ -145,53 +144,27 @@
" cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue\n",
" cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange\n",
" ],\n",
" )\n",
" \n",
" # Instead of using county centroid, calculate centroid from transit_df\n",
" # Otherwise, it's too zoomed out from where transit routes are\n",
" transit_centroid = (to_map\n",
" .to_crs(geography_utils.WGS84).geometry.centroid\n",
" .iloc[0]\n",
" )\n",
"\n",
" LAYERS_DICT = {\n",
" \"Highways\": {\"df\": hwy_df,\n",
" \"plot_col\": \"Route\",\n",
" \"popup_dict\": hwys_popup_dict, \n",
" \"tooltip_dict\": hwys_popup_dict,\n",
" \"colorscale\": hwys_color,\n",
" },\n",
" \"Transit Routes\": {\"df\": to_map,\n",
" \"plot_col\": \"parallel\",\n",
" \"popup_dict\": transit_popup_dict, \n",
" \"tooltip_dict\": transit_popup_dict,\n",
" \"colorscale\": colorscale,\n",
" },\n",
" }\n",
" ) \n",
" \n",
" LEGEND_URL = (\n",
" \"https://github.com/cal-itp/data-analyses/raw/\"\n",
" \"main/bus_service_increase/\"\n",
" \"img/legend_intersecting_parallel.png\"\n",
" )\n",
" \n",
" LEGEND_DICT = {\n",
" \"legend_url\": LEGEND_URL,\n",
" \"legend_bottom\": 85,\n",
" \"legend_left\": 5,\n",
" }\n",
" \n",
" fig = hwy_df.explore(\n",
" \"Route\", tiles = \"CartoDB Positron\",\n",
" cmap = colorscale, tooltip = list(hwys_popup_dict.keys()),\n",
" name = \"Highways\",\n",
" )\n",
" \n",
" fig = map_utils.make_folium_multiple_layers_map(\n",
" LAYERS_DICT,\n",
" fig_width = 700, fig_height = 700, \n",
" zoom=11, \n",
" centroid = [round(transit_centroid.y,2), \n",
" round(transit_centroid.x, 2)], \n",
" title=f\"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}\",\n",
" legend_dict = LEGEND_DICT\n",
" fig = to_map.explore(\"parallel\",\n",
" m=fig, cmap = colorscale, name=\"Transit Routes\",\n",
" tooltip = list(transit_popup_dict.keys())\n",
" )\n",
" \n",
" folium.LayerControl().add_to(fig)\n",
"\n",
" display(fig)\n",
" #fig.save(f\"{IMG_PATH}parallel_{operator_name}.html\")\n",
" #print(f\"{operator_name} map saved\")"
Expand Down
6 changes: 3 additions & 3 deletions bus_service_increase/C3_debug_notinshapes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
"\n",
"import create_parallel_corridors\n",
"from bus_service_utils import utils\n",
"from shared_utils import geography_utils\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import portfolio_utils\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"IMG_PATH = create_parallel_corridors.IMG_PATH\n",
"DATA_PATH = create_parallel_corridors.DATA_PATH\n",
Expand Down Expand Up @@ -553,7 +553,7 @@
" \"addl_service_hrs\", \"service_hours_annual\", \n",
" \"addl_service_hrs_annual\"\n",
" ]\n",
"a1 = geography_utils.aggregate_by_geography(service_increase,\n",
"a1 = portfolio_utils.aggregate_by_geography(service_increase,\n",
" group_cols = [\"itp_id\", \"day_name\", \"tract_type\"],\n",
" sum_cols = sum_cols,\n",
" )"
Expand Down
10 changes: 5 additions & 5 deletions bus_service_increase/C4_select_routes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"import shared_utils\n",
"from bus_service_utils import utils"
"from bus_service_utils import utils as bus_utils\n",
"from calitp_data_analysis import utils"
]
},
{
Expand All @@ -45,8 +45,8 @@
"metadata": {},
"outputs": [],
"source": [
"gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
" \"parallel_or_intersecting\")"
"gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
" \"parallel_or_intersecting\")"
]
},
{
Expand Down Expand Up @@ -136,7 +136,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions bus_service_increase/C7_target_highway_corridors.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
"\n",
"import setup_corridors_stats\n",
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
"from shared_utils import geography_utils, styleguide\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import styleguide\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n",
"\n",
Expand Down
21 changes: 11 additions & 10 deletions bus_service_increase/D1_setup_parallel_trips_with_stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@

os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000)

import shared_utils
from bus_service_utils import utils
from shared_utils import gtfs_utils, rt_dates, rt_utils
from bus_service_utils import utils as bus_utils
from calitp_data_analysis import geography_utils, utils

ANALYSIS_DATE = shared_utils.rt_dates.PMAC["Q2_2022"]
COMPILED_CACHED = f"{shared_utils.rt_utils.GCS_FILE_PATH}compiled_cached_views/"
ANALYSIS_DATE = rt_dates.PMAC["Q2_2022"]
COMPILED_CACHED = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/"


def grab_service_hours(selected_date: str,
valid_trip_keys: list) -> pd.DataFrame:
daily_service_hours = shared_utils.gtfs_utils.get_trips(
daily_service_hours = gtfs_utils.get_trips(
selected_date = selected_date,
itp_id_list = None,
# Keep more columns, route_id, shape_id, direction_id so the metrolink fix
Expand All @@ -36,7 +37,7 @@ def grab_service_hours(selected_date: str,
)

daily_service_hours.to_parquet(
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")


def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
Expand All @@ -45,7 +46,7 @@ def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
f"{COMPILED_CACHED}trips_{selected_date}.parquet")

daily_service_hours = pd.read_parquet(
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")

df = dd.merge(
trips,
Expand Down Expand Up @@ -120,7 +121,7 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
stop_times_for_trip,
on = ["calitp_itp_id", "stop_id"],
how = "inner"
).to_crs(shared_utils.geography_utils.WGS84)
).to_crs(geography_utils.WGS84)


stop_times_with_geom2 = (stop_times_with_geom.drop(
Expand All @@ -146,8 +147,8 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,

trips_with_stops = grab_stops_for_trip_selected(one_trip, ANALYSIS_DATE)

shared_utils.utils.geoparquet_gcs_export(
utils.geoparquet_gcs_export(
trips_with_stops,
utils.GCS_FILE_PATH,
bus_utils.GCS_FILE_PATH,
f"trips_with_stops_{ANALYSIS_DATE}"
)
18 changes: 9 additions & 9 deletions bus_service_increase/D4_make_gmaps_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from datetime import datetime
from loguru import logger

import shared_utils
from bus_service_utils import utils
from calitp_data_analysis import geography_utils, utils
from bus_service_utils import utils as bus_utils
from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE, COMPILED_CACHED

logger.add("./logs/make_gmaps_results.log")
Expand All @@ -22,7 +22,7 @@
level="INFO")

DATA_PATH = "./gmaps_cache/"
GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
GCS_FILE_PATH = f"{bus_utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"

def grab_cached_results(df: pd.DataFrame) -> (list, list):
result_ids = list(df.identifier_num)
Expand All @@ -32,7 +32,7 @@ def grab_cached_results(df: pd.DataFrame) -> (list, list):

for i in result_ids:
try:
json_dict = utils.open_request_json(i,
json_dict = bus_utils.open_request_json(i,
data_path = DATA_PATH,
gcs_file_path = GCS_FILE_PATH
)
Expand Down Expand Up @@ -71,7 +71,7 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
if __name__ == "__main__":
time0 = datetime.now()

df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
df = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")

successful_ids, durations = grab_cached_results(df)
logger.info("Grabbed cached results")
Expand Down Expand Up @@ -106,11 +106,11 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
how = "inner",
# many on right because trip_ids can share same shape_id
validate = "1:m"
).to_crs(shared_utils.geography_utils.WGS84)
).to_crs(geography_utils.WGS84)

shared_utils.utils.geoparquet_gcs_export(gdf,
utils.GCS_FILE_PATH,
f"gmaps_results_{ANALYSIS_DATE}")
utils.geoparquet_gcs_export(gdf,
bus_utils.GCS_FILE_PATH,
f"gmaps_results_{ANALYSIS_DATE}")

end = datetime.now()
logger.info(f"Total execution: {end - time0}")
Expand Down
19 changes: 10 additions & 9 deletions bus_service_increase/D5_make_stripplot_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
from calitp_data_analysis.tables import tbls
from siuba import *

import shared_utils
import D2_setup_gmaps as setup_gmaps
import E2_aggregated_route_stats as aggregated_route_stats
from bus_service_utils import utils
from bus_service_utils import utils as bus_utils
from calitp_data_analysis import utils
from shared_utils import portfolio_utils, rt_utils
from D1_setup_parallel_trips_with_stops import (ANALYSIS_DATE, COMPILED_CACHED,
merge_trips_with_service_hours)

Expand Down Expand Up @@ -62,7 +63,7 @@ def add_trip_time_of_day(trips: pd.DataFrame) -> pd.DataFrame:
# Add time-of-day
df = df.assign(
time_of_day = df.apply(
lambda x: shared_utils.rt_utils.categorize_time_of_day(
lambda x: rt_utils.categorize_time_of_day(
x.trip_first_departure),
axis=1)
)
Expand Down Expand Up @@ -193,7 +194,7 @@ def add_route_group(df: gpd.GeoDataFrame,

# Use agency_name from our views.gtfs_schedule.agency instead of Airtable?
def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
agency_names = shared_utils.portfolio_utils.add_agency_name(
agency_names = portfolio_utils.add_agency_name(
selected_date = ANALYSIS_DATE)

df2 = pd.merge(
Expand All @@ -209,7 +210,7 @@ def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:

def merge_in_airtable(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
# Don't use name from Airtable. But, use district.
caltrans_districts = shared_utils.portfolio_utils.add_caltrans_district()
caltrans_districts = portfolio_utils.add_caltrans_district()

# Airtable gives us fewer duplicates than doing tbl.gtfs_schedule.agency()
# But naming should be done with tbl.gtfs_schedule.agency because that's what's used
Expand All @@ -231,7 +232,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
under quarterly performance objective work.
"""
route_categories = (gpd.read_parquet(
f"{utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
f"{bus_utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
.rename(columns = {"itp_id": "calitp_itp_id"})
)

Expand All @@ -244,7 +245,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
)

# Clean up route_name
route_names = shared_utils.portfolio_utils.add_route_name(ANALYSIS_DATE)
route_names = portfolio_utils.add_route_name(ANALYSIS_DATE)

gdf3 = pd.merge(
gdf2,
Expand Down Expand Up @@ -330,7 +331,7 @@ def assemble_data(analysis_date: str, threshold: float = 1.5,
gdf = assemble_data(ANALYSIS_DATE, threshold = 1.5,
service_time_cutoffs = SERVICE_TIME_CUTOFFS)

shared_utils.utils.geoparquet_gcs_export(
utils.geoparquet_gcs_export(
gdf,
utils.GCS_FILE_PATH,
bus_utils.GCS_FILE_PATH,
f"competitive_route_variability_{ANALYSIS_DATE}")
2 changes: 1 addition & 1 deletion bus_service_increase/E1_get_buses_on_shn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
from bus_service_utils import create_parallel_corridors, utils
from shared_utils import geography_utils, utils
from calitp_data_analysis import geography_utils, utils

catalog = intake.open_catalog("./*.yml")

Expand Down
9 changes: 4 additions & 5 deletions bus_service_increase/E2_aggregated_route_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
import geopandas as gpd
import pandas as pd

from shared_utils import (geography_utils, gtfs_utils,
rt_utils, portfolio_utils, utils
)
from shared_utils import gtfs_utils, portfolio_utils, rt_utils
from calitp_data_analysis import utils
from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
from bus_service_utils import gtfs_build

Expand Down Expand Up @@ -254,7 +253,7 @@ def calculate_mean_speed_by_route(analysis_date: str,

# Each trip is 1 observation, just take the average (not weighted)
# to get route-level mean_speed_mph
mean_speed = geography_utils.aggregate_by_geography(
mean_speed = portfolio_utils.aggregate_by_geography(
df,
group_cols = group_cols,
mean_cols = ["mean_speed_mph"]
Expand All @@ -281,7 +280,7 @@ def get_competitive_routes() -> pd.DataFrame:
"num_competitive", "pct_trips_competitive",
]

route_df = geography_utils.aggregate_by_geography(
route_df = portfolio_utils.aggregate_by_geography(
trip_df,
group_cols = route_level_cols,
mean_cols = ["bus_multiplier", "bus_difference"],
Expand Down
Loading