Skip to content

Commit

Permalink
Merge pull request #907 from cal-itp/tiffany-switch-imports
Browse files Browse the repository at this point in the history
Tiffany switch imports from `shared_utils` to `calitp_data_analysis`
  • Loading branch information
tiffanychu90 authored Sep 29, 2023
2 parents ddc3c78 + ba9bc38 commit 4c7fe4e
Show file tree
Hide file tree
Showing 133 changed files with 405 additions and 504 deletions.
4 changes: 1 addition & 3 deletions bus_service_increase/A3_service_increase_estimator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
"ix = pd.IndexSlice\n",
"\n",
"from utils import *\n",
"import shared_utils\n",
"\n",
"from siuba import *"
]
},
Expand Down Expand Up @@ -704,7 +702,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
53 changes: 13 additions & 40 deletions bus_service_increase/C1_transit_near_highways.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@
],
"source": [
"import branca\n",
"import folium\n",
"import geopandas as gpd\n",
"import intake\n",
"import ipywidgets as widgets\n",
"import pandas as pd\n",
"\n",
"from IPython.display import Markdown, HTML\n",
"\n",
"import setup_corridors_stats\n",
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
"from shared_utils import geography_utils\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"catalog = intake.open_catalog(\"./*.yml\")"
]
Expand Down Expand Up @@ -145,53 +144,27 @@
" cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue\n",
" cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange\n",
" ],\n",
" )\n",
" \n",
" # Instead of using county centroid, calculate centroid from transit_df\n",
" # Otherwise, it's too zoomed out from where transit routes are\n",
" transit_centroid = (to_map\n",
" .to_crs(geography_utils.WGS84).geometry.centroid\n",
" .iloc[0]\n",
" )\n",
"\n",
" LAYERS_DICT = {\n",
" \"Highways\": {\"df\": hwy_df,\n",
" \"plot_col\": \"Route\",\n",
" \"popup_dict\": hwys_popup_dict, \n",
" \"tooltip_dict\": hwys_popup_dict,\n",
" \"colorscale\": hwys_color,\n",
" },\n",
" \"Transit Routes\": {\"df\": to_map,\n",
" \"plot_col\": \"parallel\",\n",
" \"popup_dict\": transit_popup_dict, \n",
" \"tooltip_dict\": transit_popup_dict,\n",
" \"colorscale\": colorscale,\n",
" },\n",
" }\n",
" ) \n",
" \n",
" LEGEND_URL = (\n",
" \"https://github.com/cal-itp/data-analyses/raw/\"\n",
" \"main/bus_service_increase/\"\n",
" \"img/legend_intersecting_parallel.png\"\n",
" )\n",
" \n",
" LEGEND_DICT = {\n",
" \"legend_url\": LEGEND_URL,\n",
" \"legend_bottom\": 85,\n",
" \"legend_left\": 5,\n",
" }\n",
" \n",
" fig = hwy_df.explore(\n",
" \"Route\", tiles = \"CartoDB Positron\",\n",
" cmap = colorscale, tooltip = list(hwys_popup_dict.keys()),\n",
" name = \"Highways\",\n",
" )\n",
" \n",
" fig = map_utils.make_folium_multiple_layers_map(\n",
" LAYERS_DICT,\n",
" fig_width = 700, fig_height = 700, \n",
" zoom=11, \n",
" centroid = [round(transit_centroid.y,2), \n",
" round(transit_centroid.x, 2)], \n",
" title=f\"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}\",\n",
" legend_dict = LEGEND_DICT\n",
" fig = to_map.explore(\"parallel\",\n",
" m=fig, cmap = colorscale, name=\"Transit Routes\",\n",
" tooltip = list(transit_popup_dict.keys())\n",
" )\n",
" \n",
" folium.LayerControl().add_to(fig)\n",
"\n",
" display(fig)\n",
" #fig.save(f\"{IMG_PATH}parallel_{operator_name}.html\")\n",
" #print(f\"{operator_name} map saved\")"
Expand Down
6 changes: 3 additions & 3 deletions bus_service_increase/C3_debug_notinshapes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
"\n",
"import create_parallel_corridors\n",
"from bus_service_utils import utils\n",
"from shared_utils import geography_utils\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import portfolio_utils\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"IMG_PATH = create_parallel_corridors.IMG_PATH\n",
"DATA_PATH = create_parallel_corridors.DATA_PATH\n",
Expand Down Expand Up @@ -553,7 +553,7 @@
" \"addl_service_hrs\", \"service_hours_annual\", \n",
" \"addl_service_hrs_annual\"\n",
" ]\n",
"a1 = geography_utils.aggregate_by_geography(service_increase,\n",
"a1 = portfolio_utils.aggregate_by_geography(service_increase,\n",
" group_cols = [\"itp_id\", \"day_name\", \"tract_type\"],\n",
" sum_cols = sum_cols,\n",
" )"
Expand Down
10 changes: 5 additions & 5 deletions bus_service_increase/C4_select_routes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"import shared_utils\n",
"from bus_service_utils import utils"
"from bus_service_utils import utils as bus_utils\n",
"from calitp_data_analysis import utils"
]
},
{
Expand All @@ -45,8 +45,8 @@
"metadata": {},
"outputs": [],
"source": [
"gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
" \"parallel_or_intersecting\")"
"gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
" \"parallel_or_intersecting\")"
]
},
{
Expand Down Expand Up @@ -136,7 +136,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions bus_service_increase/C7_target_highway_corridors.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
"\n",
"import setup_corridors_stats\n",
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
"from shared_utils import geography_utils, styleguide\n",
"from shared_utils import calitp_color_palette as cp\n",
"from calitp_data_analysis import styleguide\n",
"from calitp_data_analysis import calitp_color_palette as cp\n",
"\n",
"alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n",
"\n",
Expand Down
21 changes: 11 additions & 10 deletions bus_service_increase/D1_setup_parallel_trips_with_stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@

os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000)

import shared_utils
from bus_service_utils import utils
from shared_utils import gtfs_utils, rt_dates, rt_utils
from bus_service_utils import utils as bus_utils
from calitp_data_analysis import geography_utils, utils

ANALYSIS_DATE = shared_utils.rt_dates.PMAC["Q2_2022"]
COMPILED_CACHED = f"{shared_utils.rt_utils.GCS_FILE_PATH}compiled_cached_views/"
ANALYSIS_DATE = rt_dates.PMAC["Q2_2022"]
COMPILED_CACHED = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/"


def grab_service_hours(selected_date: str,
valid_trip_keys: list) -> pd.DataFrame:
daily_service_hours = shared_utils.gtfs_utils.get_trips(
daily_service_hours = gtfs_utils.get_trips(
selected_date = selected_date,
itp_id_list = None,
# Keep more columns, route_id, shape_id, direction_id so the metrolink fix
Expand All @@ -36,7 +37,7 @@ def grab_service_hours(selected_date: str,
)

daily_service_hours.to_parquet(
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")


def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
Expand All @@ -45,7 +46,7 @@ def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
f"{COMPILED_CACHED}trips_{selected_date}.parquet")

daily_service_hours = pd.read_parquet(
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")

df = dd.merge(
trips,
Expand Down Expand Up @@ -120,7 +121,7 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
stop_times_for_trip,
on = ["calitp_itp_id", "stop_id"],
how = "inner"
).to_crs(shared_utils.geography_utils.WGS84)
).to_crs(geography_utils.WGS84)


stop_times_with_geom2 = (stop_times_with_geom.drop(
Expand All @@ -146,8 +147,8 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,

trips_with_stops = grab_stops_for_trip_selected(one_trip, ANALYSIS_DATE)

shared_utils.utils.geoparquet_gcs_export(
utils.geoparquet_gcs_export(
trips_with_stops,
utils.GCS_FILE_PATH,
bus_utils.GCS_FILE_PATH,
f"trips_with_stops_{ANALYSIS_DATE}"
)
18 changes: 9 additions & 9 deletions bus_service_increase/D4_make_gmaps_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from datetime import datetime
from loguru import logger

import shared_utils
from bus_service_utils import utils
from calitp_data_analysis import geography_utils, utils
from bus_service_utils import utils as bus_utils
from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE, COMPILED_CACHED

logger.add("./logs/make_gmaps_results.log")
Expand All @@ -22,7 +22,7 @@
level="INFO")

DATA_PATH = "./gmaps_cache/"
GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
GCS_FILE_PATH = f"{bus_utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"

def grab_cached_results(df: pd.DataFrame) -> (list, list):
result_ids = list(df.identifier_num)
Expand All @@ -32,7 +32,7 @@ def grab_cached_results(df: pd.DataFrame) -> (list, list):

for i in result_ids:
try:
json_dict = utils.open_request_json(i,
json_dict = bus_utils.open_request_json(i,
data_path = DATA_PATH,
gcs_file_path = GCS_FILE_PATH
)
Expand Down Expand Up @@ -71,7 +71,7 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
if __name__ == "__main__":
time0 = datetime.now()

df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
df = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")

successful_ids, durations = grab_cached_results(df)
logger.info("Grabbed cached results")
Expand Down Expand Up @@ -106,11 +106,11 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
how = "inner",
# many on right because trip_ids can share same shape_id
validate = "1:m"
).to_crs(shared_utils.geography_utils.WGS84)
).to_crs(geography_utils.WGS84)

shared_utils.utils.geoparquet_gcs_export(gdf,
utils.GCS_FILE_PATH,
f"gmaps_results_{ANALYSIS_DATE}")
utils.geoparquet_gcs_export(gdf,
bus_utils.GCS_FILE_PATH,
f"gmaps_results_{ANALYSIS_DATE}")

end = datetime.now()
logger.info(f"Total execution: {end - time0}")
Expand Down
19 changes: 10 additions & 9 deletions bus_service_increase/D5_make_stripplot_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
from calitp_data_analysis.tables import tbls
from siuba import *

import shared_utils
import D2_setup_gmaps as setup_gmaps
import E2_aggregated_route_stats as aggregated_route_stats
from bus_service_utils import utils
from bus_service_utils import utils as bus_utils
from calitp_data_analysis import utils
from shared_utils import portfolio_utils, rt_utils
from D1_setup_parallel_trips_with_stops import (ANALYSIS_DATE, COMPILED_CACHED,
merge_trips_with_service_hours)

Expand Down Expand Up @@ -62,7 +63,7 @@ def add_trip_time_of_day(trips: pd.DataFrame) -> pd.DataFrame:
# Add time-of-day
df = df.assign(
time_of_day = df.apply(
lambda x: shared_utils.rt_utils.categorize_time_of_day(
lambda x: rt_utils.categorize_time_of_day(
x.trip_first_departure),
axis=1)
)
Expand Down Expand Up @@ -193,7 +194,7 @@ def add_route_group(df: gpd.GeoDataFrame,

# Use agency_name from our views.gtfs_schedule.agency instead of Airtable?
def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
agency_names = shared_utils.portfolio_utils.add_agency_name(
agency_names = portfolio_utils.add_agency_name(
selected_date = ANALYSIS_DATE)

df2 = pd.merge(
Expand All @@ -209,7 +210,7 @@ def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:

def merge_in_airtable(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
# Don't use name from Airtable. But, use district.
caltrans_districts = shared_utils.portfolio_utils.add_caltrans_district()
caltrans_districts = portfolio_utils.add_caltrans_district()

# Airtable gives us fewer duplicates than doing tbl.gtfs_schedule.agency()
# But naming should be done with tbl.gtfs_schedule.agency because that's what's used
Expand All @@ -231,7 +232,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
under quarterly performance objective work.
"""
route_categories = (gpd.read_parquet(
f"{utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
f"{bus_utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
.rename(columns = {"itp_id": "calitp_itp_id"})
)

Expand All @@ -244,7 +245,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
)

# Clean up route_name
route_names = shared_utils.portfolio_utils.add_route_name(ANALYSIS_DATE)
route_names = portfolio_utils.add_route_name(ANALYSIS_DATE)

gdf3 = pd.merge(
gdf2,
Expand Down Expand Up @@ -330,7 +331,7 @@ def assemble_data(analysis_date: str, threshold: float = 1.5,
gdf = assemble_data(ANALYSIS_DATE, threshold = 1.5,
service_time_cutoffs = SERVICE_TIME_CUTOFFS)

shared_utils.utils.geoparquet_gcs_export(
utils.geoparquet_gcs_export(
gdf,
utils.GCS_FILE_PATH,
bus_utils.GCS_FILE_PATH,
f"competitive_route_variability_{ANALYSIS_DATE}")
2 changes: 1 addition & 1 deletion bus_service_increase/E1_get_buses_on_shn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
from bus_service_utils import create_parallel_corridors, utils
from shared_utils import geography_utils, utils
from calitp_data_analysis import geography_utils, utils

catalog = intake.open_catalog("./*.yml")

Expand Down
9 changes: 4 additions & 5 deletions bus_service_increase/E2_aggregated_route_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
import geopandas as gpd
import pandas as pd

from shared_utils import (geography_utils, gtfs_utils,
rt_utils, portfolio_utils, utils
)
from shared_utils import gtfs_utils, portfolio_utils, rt_utils
from calitp_data_analysis import utils
from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
from bus_service_utils import gtfs_build

Expand Down Expand Up @@ -254,7 +253,7 @@ def calculate_mean_speed_by_route(analysis_date: str,

# Each trip is 1 observation, just take the average (not weighted)
# to get route-level mean_speed_mph
mean_speed = geography_utils.aggregate_by_geography(
mean_speed = portfolio_utils.aggregate_by_geography(
df,
group_cols = group_cols,
mean_cols = ["mean_speed_mph"]
Expand All @@ -281,7 +280,7 @@ def get_competitive_routes() -> pd.DataFrame:
"num_competitive", "pct_trips_competitive",
]

route_df = geography_utils.aggregate_by_geography(
route_df = portfolio_utils.aggregate_by_geography(
trip_df,
group_cols = route_level_cols,
mean_cols = ["bus_multiplier", "bus_difference"],
Expand Down
Loading

0 comments on commit 4c7fe4e

Please sign in to comment.