From a445f101fad5b2d9b737f3b393c05d804a80fb4d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 20:35:48 +0000 Subject: [PATCH 01/14] shared_utils to calitp_data_analysis for bus_service_increase/ --- .../A3_service_increase_estimator.ipynb | 4 +- .../C1_transit_near_highways.ipynb | 53 +++++-------------- .../C3_debug_notinshapes.ipynb | 6 +-- bus_service_increase/C4_select_routes.ipynb | 10 ++-- .../C7_target_highway_corridors.ipynb | 4 +- .../D1_setup_parallel_trips_with_stops.py | 21 ++++---- bus_service_increase/D4_make_gmaps_results.py | 18 +++---- .../D5_make_stripplot_data.py | 19 +++---- bus_service_increase/E1_get_buses_on_shn.py | 2 +- .../E2_aggregated_route_stats.py | 9 ++-- .../E3_calculate_speeds_all_operators.py | 4 +- .../E4_highway_segments_stats.py | 17 +++--- .../E5_highway_processed_data_for_reports.py | 6 +-- .../bus_service_utils/better_bus_utils.py | 2 +- .../calenviroscreen_lehd_utils.py | 16 +++--- .../create_parallel_corridors.py | 2 +- bus_service_increase/competitive-routes.ipynb | 3 +- bus_service_increase/create_analysis_data.py | 16 +++--- .../explore-aggregation-examples.ipynb | 6 +-- .../highways-existing-transit.ipynb | 4 +- .../highways-uncompetitive-routes.ipynb | 2 +- bus_service_increase/make_tract_viz.py | 6 +-- .../parallel_corridors_utils.py | 23 +++----- bus_service_increase/setup_corridors_stats.py | 5 +- .../setup_service_increase.py | 7 ++- bus_service_increase/setup_tract_charts.py | 4 +- .../transit-deserts-uncompetitive.ipynb | 4 +- bus_service_increase/transit-on-shn.ipynb | 6 +-- bus_service_increase/warehouse_queries.py | 23 ++++---- 29 files changed, 135 insertions(+), 167 deletions(-) diff --git a/bus_service_increase/A3_service_increase_estimator.ipynb b/bus_service_increase/A3_service_increase_estimator.ipynb index 6bc2fa5ab..956095ee1 100644 --- a/bus_service_increase/A3_service_increase_estimator.ipynb +++ b/bus_service_increase/A3_service_increase_estimator.ipynb @@ -20,8 +20,6 @@ "ix = pd.IndexSlice\n", "\n", "from utils import *\n", - "import shared_utils\n", - "\n", "from siuba import *" ] }, @@ -704,7 +702,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/bus_service_increase/C1_transit_near_highways.ipynb b/bus_service_increase/C1_transit_near_highways.ipynb index d0c842244..2c427fb53 100644 --- a/bus_service_increase/C1_transit_near_highways.ipynb +++ b/bus_service_increase/C1_transit_near_highways.ipynb @@ -27,17 +27,16 @@ ], "source": [ "import branca\n", + "import folium\n", "import geopandas as gpd\n", "import intake\n", - "import ipywidgets as widgets\n", "import pandas as pd\n", "\n", "from IPython.display import Markdown, HTML\n", "\n", "import setup_corridors_stats\n", "from create_parallel_corridors import IMG_PATH, DATA_PATH\n", - "from shared_utils import geography_utils\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "catalog = intake.open_catalog(\"./*.yml\")" ] @@ -145,53 +144,27 @@ " cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue\n", " cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange\n", " ],\n", - " )\n", - " \n", - " # Instead of using county centroid, calculate centroid from transit_df\n", - " # Otherwise, it's too zoomed out from where transit routes are\n", - " transit_centroid = (to_map\n", - " .to_crs(geography_utils.WGS84).geometry.centroid\n", - " .iloc[0]\n", - " )\n", - "\n", - " LAYERS_DICT = {\n", - " \"Highways\": {\"df\": hwy_df,\n", - " \"plot_col\": \"Route\",\n", - " \"popup_dict\": hwys_popup_dict, \n", - " \"tooltip_dict\": hwys_popup_dict,\n", - " \"colorscale\": hwys_color,\n", - " },\n", - " \"Transit Routes\": {\"df\": to_map,\n", - " \"plot_col\": \"parallel\",\n", - " \"popup_dict\": transit_popup_dict, \n", - " \"tooltip_dict\": transit_popup_dict,\n", - " \"colorscale\": colorscale,\n", - " },\n", - " }\n", + " ) \n", " \n", " LEGEND_URL = (\n", " \"https://github.com/cal-itp/data-analyses/raw/\"\n", " \"main/bus_service_increase/\"\n", " \"img/legend_intersecting_parallel.png\"\n", " )\n", - " \n", - " LEGEND_DICT = {\n", - " \"legend_url\": LEGEND_URL,\n", - " \"legend_bottom\": 85,\n", - " \"legend_left\": 5,\n", - " }\n", " \n", + " fig = hwy_df.explore(\n", + " \"Route\", tiles = \"CartoDB Positron\",\n", + " cmap = colorscale, tooltip = list(hwys_popup_dict.keys()),\n", + " name = \"Highways\",\n", + " )\n", " \n", - " fig = map_utils.make_folium_multiple_layers_map(\n", - " LAYERS_DICT,\n", - " fig_width = 700, fig_height = 700, \n", - " zoom=11, \n", - " centroid = [round(transit_centroid.y,2), \n", - " round(transit_centroid.x, 2)], \n", - " title=f\"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}\",\n", - " legend_dict = LEGEND_DICT\n", + " fig = to_map.explore(\"parallel\",\n", + " m=fig, cmap = colorscale, name=\"Transit Routes\",\n", + " tooltip = list(transit_popup_dict.keys())\n", " )\n", " \n", + " folium.LayerControl().add_to(fig)\n", + "\n", " display(fig)\n", " #fig.save(f\"{IMG_PATH}parallel_{operator_name}.html\")\n", " #print(f\"{operator_name} map saved\")" diff --git a/bus_service_increase/C3_debug_notinshapes.ipynb b/bus_service_increase/C3_debug_notinshapes.ipynb index afea21df8..fd1c66119 100644 --- a/bus_service_increase/C3_debug_notinshapes.ipynb +++ b/bus_service_increase/C3_debug_notinshapes.ipynb @@ -51,8 +51,8 @@ "\n", "import create_parallel_corridors\n", "from bus_service_utils import utils\n", - "from shared_utils import geography_utils\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import portfolio_utils\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "IMG_PATH = create_parallel_corridors.IMG_PATH\n", "DATA_PATH = create_parallel_corridors.DATA_PATH\n", @@ -553,7 +553,7 @@ " \"addl_service_hrs\", \"service_hours_annual\", \n", " \"addl_service_hrs_annual\"\n", " ]\n", - "a1 = geography_utils.aggregate_by_geography(service_increase,\n", + "a1 = portfolio_utils.aggregate_by_geography(service_increase,\n", " group_cols = [\"itp_id\", \"day_name\", \"tract_type\"],\n", " sum_cols = sum_cols,\n", " )" diff --git a/bus_service_increase/C4_select_routes.ipynb b/bus_service_increase/C4_select_routes.ipynb index d468fb7d8..13bbfa05a 100644 --- a/bus_service_increase/C4_select_routes.ipynb +++ b/bus_service_increase/C4_select_routes.ipynb @@ -34,8 +34,8 @@ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", - "import shared_utils\n", - "from bus_service_utils import utils" + "from bus_service_utils import utils as bus_utils\n", + "from calitp_data_analysis import utils" ] }, { @@ -45,8 +45,8 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n", - " \"parallel_or_intersecting\")" + "gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n", + " \"parallel_or_intersecting\")" ] }, { @@ -136,7 +136,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/bus_service_increase/C7_target_highway_corridors.ipynb b/bus_service_increase/C7_target_highway_corridors.ipynb index 971c9bb97..d6c0409a9 100644 --- a/bus_service_increase/C7_target_highway_corridors.ipynb +++ b/bus_service_increase/C7_target_highway_corridors.ipynb @@ -31,8 +31,8 @@ "\n", "import setup_corridors_stats\n", "from create_parallel_corridors import IMG_PATH, DATA_PATH\n", - "from shared_utils import geography_utils, styleguide\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import styleguide\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n", "\n", diff --git a/bus_service_increase/D1_setup_parallel_trips_with_stops.py b/bus_service_increase/D1_setup_parallel_trips_with_stops.py index ee1af4b18..cfb5ef321 100644 --- a/bus_service_increase/D1_setup_parallel_trips_with_stops.py +++ b/bus_service_increase/D1_setup_parallel_trips_with_stops.py @@ -15,16 +15,17 @@ os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000) -import shared_utils -from bus_service_utils import utils +from shared_utils import gtfs_utils, rt_dates, rt_utils +from bus_service_utils import utils as bus_utils +from calitp_data_analysis import geography_utils, utils -ANALYSIS_DATE = shared_utils.rt_dates.PMAC["Q2_2022"] -COMPILED_CACHED = f"{shared_utils.rt_utils.GCS_FILE_PATH}compiled_cached_views/" +ANALYSIS_DATE = rt_dates.PMAC["Q2_2022"] +COMPILED_CACHED = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/" def grab_service_hours(selected_date: str, valid_trip_keys: list) -> pd.DataFrame: - daily_service_hours = shared_utils.gtfs_utils.get_trips( + daily_service_hours = gtfs_utils.get_trips( selected_date = selected_date, itp_id_list = None, # Keep more columns, route_id, shape_id, direction_id so the metrolink fix @@ -36,7 +37,7 @@ def grab_service_hours(selected_date: str, ) daily_service_hours.to_parquet( - f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet") + f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet") def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame: @@ -45,7 +46,7 @@ def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame: f"{COMPILED_CACHED}trips_{selected_date}.parquet") daily_service_hours = pd.read_parquet( - f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet") + f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet") df = dd.merge( trips, @@ -120,7 +121,7 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame, stop_times_for_trip, on = ["calitp_itp_id", "stop_id"], how = "inner" - ).to_crs(shared_utils.geography_utils.WGS84) + ).to_crs(geography_utils.WGS84) stop_times_with_geom2 = (stop_times_with_geom.drop( @@ -146,8 +147,8 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame, trips_with_stops = grab_stops_for_trip_selected(one_trip, ANALYSIS_DATE) - shared_utils.utils.geoparquet_gcs_export( + utils.geoparquet_gcs_export( trips_with_stops, - utils.GCS_FILE_PATH, + bus_utils.GCS_FILE_PATH, f"trips_with_stops_{ANALYSIS_DATE}" ) \ No newline at end of file diff --git a/bus_service_increase/D4_make_gmaps_results.py b/bus_service_increase/D4_make_gmaps_results.py index f1ae75d70..992be5b97 100644 --- a/bus_service_increase/D4_make_gmaps_results.py +++ b/bus_service_increase/D4_make_gmaps_results.py @@ -12,8 +12,8 @@ from datetime import datetime from loguru import logger -import shared_utils -from bus_service_utils import utils +from calitp_data_analysis import geography_utils, utils +from bus_service_utils import utils as bus_utils from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE, COMPILED_CACHED logger.add("./logs/make_gmaps_results.log") @@ -22,7 +22,7 @@ level="INFO") DATA_PATH = "./gmaps_cache/" -GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/" +GCS_FILE_PATH = f"{bus_utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/" def grab_cached_results(df: pd.DataFrame) -> (list, list): result_ids = list(df.identifier_num) @@ -32,7 +32,7 @@ def grab_cached_results(df: pd.DataFrame) -> (list, list): for i in result_ids: try: - json_dict = utils.open_request_json(i, + json_dict = bus_utils.open_request_json(i, data_path = DATA_PATH, gcs_file_path = GCS_FILE_PATH ) @@ -71,7 +71,7 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame: if __name__ == "__main__": time0 = datetime.now() - df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet") + df = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet") successful_ids, durations = grab_cached_results(df) logger.info("Grabbed cached results") @@ -106,11 +106,11 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame: how = "inner", # many on right because trip_ids can share same shape_id validate = "1:m" - ).to_crs(shared_utils.geography_utils.WGS84) + ).to_crs(geography_utils.WGS84) - shared_utils.utils.geoparquet_gcs_export(gdf, - utils.GCS_FILE_PATH, - f"gmaps_results_{ANALYSIS_DATE}") + utils.geoparquet_gcs_export(gdf, + bus_utils.GCS_FILE_PATH, + f"gmaps_results_{ANALYSIS_DATE}") end = datetime.now() logger.info(f"Total execution: {end - time0}") diff --git a/bus_service_increase/D5_make_stripplot_data.py b/bus_service_increase/D5_make_stripplot_data.py index 9146fa23a..09df6ea1e 100644 --- a/bus_service_increase/D5_make_stripplot_data.py +++ b/bus_service_increase/D5_make_stripplot_data.py @@ -13,10 +13,11 @@ from calitp_data_analysis.tables import tbls from siuba import * -import shared_utils import D2_setup_gmaps as setup_gmaps import E2_aggregated_route_stats as aggregated_route_stats -from bus_service_utils import utils +from bus_service_utils import utils as bus_utils +from calitp_data_analysis import utils +from shared_utils import portfolio_utils, rt_utils from D1_setup_parallel_trips_with_stops import (ANALYSIS_DATE, COMPILED_CACHED, merge_trips_with_service_hours) @@ -62,7 +63,7 @@ def add_trip_time_of_day(trips: pd.DataFrame) -> pd.DataFrame: # Add time-of-day df = df.assign( time_of_day = df.apply( - lambda x: shared_utils.rt_utils.categorize_time_of_day( + lambda x: rt_utils.categorize_time_of_day( x.trip_first_departure), axis=1) ) @@ -193,7 +194,7 @@ def add_route_group(df: gpd.GeoDataFrame, # Use agency_name from our views.gtfs_schedule.agency instead of Airtable? def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - agency_names = shared_utils.portfolio_utils.add_agency_name( + agency_names = portfolio_utils.add_agency_name( selected_date = ANALYSIS_DATE) df2 = pd.merge( @@ -209,7 +210,7 @@ def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: def merge_in_airtable(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: # Don't use name from Airtable. But, use district. - caltrans_districts = shared_utils.portfolio_utils.add_caltrans_district() + caltrans_districts = portfolio_utils.add_caltrans_district() # Airtable gives us fewer duplicates than doing tbl.gtfs_schedule.agency() # But naming should be done with tbl.gtfs_schedule.agency because that's what's used @@ -231,7 +232,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: under quarterly performance objective work. """ route_categories = (gpd.read_parquet( - f"{utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet") + f"{bus_utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet") .rename(columns = {"itp_id": "calitp_itp_id"}) ) @@ -244,7 +245,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ) # Clean up route_name - route_names = shared_utils.portfolio_utils.add_route_name(ANALYSIS_DATE) + route_names = portfolio_utils.add_route_name(ANALYSIS_DATE) gdf3 = pd.merge( gdf2, @@ -330,7 +331,7 @@ def assemble_data(analysis_date: str, threshold: float = 1.5, gdf = assemble_data(ANALYSIS_DATE, threshold = 1.5, service_time_cutoffs = SERVICE_TIME_CUTOFFS) - shared_utils.utils.geoparquet_gcs_export( + utils.geoparquet_gcs_export( gdf, - utils.GCS_FILE_PATH, + bus_utils.GCS_FILE_PATH, f"competitive_route_variability_{ANALYSIS_DATE}") \ No newline at end of file diff --git a/bus_service_increase/E1_get_buses_on_shn.py b/bus_service_increase/E1_get_buses_on_shn.py index d7f18dc64..b325b1f7a 100644 --- a/bus_service_increase/E1_get_buses_on_shn.py +++ b/bus_service_increase/E1_get_buses_on_shn.py @@ -13,7 +13,7 @@ from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS from bus_service_utils import create_parallel_corridors, utils -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils catalog = intake.open_catalog("./*.yml") diff --git a/bus_service_increase/E2_aggregated_route_stats.py b/bus_service_increase/E2_aggregated_route_stats.py index d86b9b431..36e37577f 100644 --- a/bus_service_increase/E2_aggregated_route_stats.py +++ b/bus_service_increase/E2_aggregated_route_stats.py @@ -18,9 +18,8 @@ import geopandas as gpd import pandas as pd -from shared_utils import (geography_utils, gtfs_utils, - rt_utils, portfolio_utils, utils - ) +from shared_utils import gtfs_utils, portfolio_utils, rt_utils +from calitp_data_analysis import utils from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS from bus_service_utils import gtfs_build @@ -254,7 +253,7 @@ def calculate_mean_speed_by_route(analysis_date: str, # Each trip is 1 observation, just take the average (not weighted) # to get route-level mean_speed_mph - mean_speed = geography_utils.aggregate_by_geography( + mean_speed = portfolio_utils.aggregate_by_geography( df, group_cols = group_cols, mean_cols = ["mean_speed_mph"] @@ -281,7 +280,7 @@ def get_competitive_routes() -> pd.DataFrame: "num_competitive", "pct_trips_competitive", ] - route_df = geography_utils.aggregate_by_geography( + route_df = portfolio_utils.aggregate_by_geography( trip_df, group_cols = route_level_cols, mean_cols = ["bus_multiplier", "bus_difference"], diff --git a/bus_service_increase/E3_calculate_speeds_all_operators.py b/bus_service_increase/E3_calculate_speeds_all_operators.py index 9a627e53b..1cc62e1cc 100644 --- a/bus_service_increase/E3_calculate_speeds_all_operators.py +++ b/bus_service_increase/E3_calculate_speeds_all_operators.py @@ -14,8 +14,8 @@ import os import pandas as pd -from shared_utils import (rt_utils, geography_utils, - gtfs_utils, utils) +from shared_utils import rt_utils, gtfs_utils +from calitp_data_analysis import geography_utils, utils from E0_bus_oppor_vars import ANALYSIS_DATE, COMPILED_CACHED_GCS from E2_aggregated_route_stats import ANALYSIS_MONTH_DAY diff --git a/bus_service_increase/E4_highway_segments_stats.py b/bus_service_increase/E4_highway_segments_stats.py index f2061304d..ffa5d0b46 100644 --- a/bus_service_increase/E4_highway_segments_stats.py +++ b/bus_service_increase/E4_highway_segments_stats.py @@ -7,7 +7,8 @@ import pandas as pd import E2_aggregated_route_stats as aggregated_route_stats -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import portfolio_utils from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS catalog = intake.open_catalog("*.yml") @@ -64,7 +65,7 @@ def average_speed_by_stop(): # Drop observations with way too fast speeds speed_by_stops2 = speed_by_stops[speed_by_stops.speed_mph <= 65] - mean_speed = geography_utils.aggregate_by_geography( + mean_speed = portfolio_utils.aggregate_by_geography( speed_by_stops2, group_cols = ["calitp_itp_id", "stop_id"], mean_cols = ["speed_mph"], @@ -143,7 +144,7 @@ def calculate_trip_weighted_speed(gdf: gpd.GeoDataFrame, weighted_speed = gdf.mean_speed_mph * gdf.num_trips ) - segment_speed_agg = geography_utils.aggregate_by_geography( + segment_speed_agg = portfolio_utils.aggregate_by_geography( segment_speed, group_cols = group_cols, sum_cols = ["weighted_speed", "num_trips"] @@ -172,7 +173,7 @@ def aggregate_to_hwy_segment(df: pd.DataFrame, ] # These are stats we can easily sum up, to highway segment level - segment = geography_utils.aggregate_by_geography( + segment = portfolio_utils.aggregate_by_geography( df, group_cols = group_cols, sum_cols = sum_cols, @@ -181,12 +182,12 @@ def aggregate_to_hwy_segment(df: pd.DataFrame, # Attach the highway segment line geom back in other_hwy_cols = list(set(highway_cols).difference(set(["hwy_segment_id"]))) - segment_with_geom = geography_utils.attach_geometry( - segment, + segment_with_geom = pd.merge( highway_segment_gdf[group_cols + other_hwy_cols + ["geometry"]].drop_duplicates(), - merge_col = group_cols, - join = "inner" + segment, + on = group_cols, + how = "inner" ) # Clean up dtypes, re-order columns diff --git a/bus_service_increase/E5_highway_processed_data_for_reports.py b/bus_service_increase/E5_highway_processed_data_for_reports.py index 4914d2419..b2001d858 100644 --- a/bus_service_increase/E5_highway_processed_data_for_reports.py +++ b/bus_service_increase/E5_highway_processed_data_for_reports.py @@ -8,8 +8,8 @@ from E0_bus_oppor_vars import ANALYSIS_DATE, GCS_FILE_PATH from parallel_corridors_utils import PCT_COMPETITIVE_THRESHOLD - -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import portfolio_utils catalog = intake.open_catalog("./*.yml") @@ -143,7 +143,7 @@ def get_pct_uncompetitive_by_highway_segment_id( Get only the highway segments where all of the transit that does intersect with that segment are uncompetitive. """ - a1 = geography_utils.aggregate_by_geography( + a1 = portfolio_utils.aggregate_by_geography( gdf, group_cols = ["hwy_segment_id"], count_cols = ["uncompetitive"], diff --git a/bus_service_increase/bus_service_utils/better_bus_utils.py b/bus_service_increase/bus_service_utils/better_bus_utils.py index 014e54288..113442ae9 100644 --- a/bus_service_increase/bus_service_utils/better_bus_utils.py +++ b/bus_service_increase/bus_service_utils/better_bus_utils.py @@ -24,7 +24,7 @@ from siuba import * from typing import Literal, Union -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils from bus_service_utils import calenviroscreen_lehd_utils GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/bus_service_increase/" diff --git a/bus_service_increase/bus_service_utils/calenviroscreen_lehd_utils.py b/bus_service_increase/bus_service_utils/calenviroscreen_lehd_utils.py index 734b65d50..35e850b83 100644 --- a/bus_service_increase/bus_service_utils/calenviroscreen_lehd_utils.py +++ b/bus_service_increase/bus_service_utils/calenviroscreen_lehd_utils.py @@ -15,8 +15,8 @@ import numpy as np import pandas as pd -import shared_utils -from bus_service_utils import utils +from calitp_data_analysis import geography_utils, utils +from bus_service_utils import utils as bus_utils #--------------------------------------------------------# ### CalEnviroScreen functions @@ -58,7 +58,7 @@ def prep_calenviroscreen(df: gpd.GeoDataFrame, # Fix tract ID and calculate pop density df = (df.assign( Tract = df.Tract.apply(lambda x: '0' + str(x)[:-2]).astype(str), - sq_mi = df.geometry.area * shared_utils.geography_utils.SQ_MI_PER_SQ_M, + sq_mi = df.geometry.area * geography_utils.SQ_MI_PER_SQ_M, ).rename(columns = { "TotPop19": "Population", "ApproxLoc": "City", @@ -122,7 +122,7 @@ def download_lehd_data(download_date: str, utils.import_csv_export_parquet( dataset_name = f"{URBAN_URL}{download_date}{dataset}", output_file_name = dataset, - GCS_FILE_PATH = utils.GCS_FILE_PATH, + GCS_FILE_PATH = bus_utils.GCS_FILE_PATH, GCS=True ) @@ -213,7 +213,7 @@ def generate_calenviroscreen_lehd_data( ) -> gpd.GeoDataFrame: # CalEnviroScreen data (gdf) - CALENVIROSCREEN_PATH = f"{utils.GCS_FILE_PATH}calenviroscreen40shpf2021shp.zip" + CALENVIROSCREEN_PATH = f"{bus_utils.GCS_FILE_PATH}calenviroscreen40shpf2021shp.zip" with fsspec.open(CALENVIROSCREEN_PATH) as file: gdf = gpd.read_file(file) @@ -223,7 +223,7 @@ def generate_calenviroscreen_lehd_data( # LEHD Data lehd_dfs = {} for d in lehd_datasets: - lehd_dfs[d] = pd.read_parquet(f"{utils.GCS_FILE_PATH}{d}.parquet") + lehd_dfs[d] = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}{d}.parquet") cleaned_dfs = [] for key, value in lehd_dfs.items(): @@ -236,9 +236,9 @@ def generate_calenviroscreen_lehd_data( final = merge_calenviroscreen_lehd(gdf, lehd) if GCS: - shared_utils.utils.geoparquet_gcs_export( + utils.geoparquet_gcs_export( final, - utils.GCS_FILE_PATH, + bus_utils.GCS_FILE_PATH, "calenviroscreen_lehd_by_tract" ) diff --git a/bus_service_increase/bus_service_utils/create_parallel_corridors.py b/bus_service_increase/bus_service_utils/create_parallel_corridors.py index ffcfaffef..53e976d92 100644 --- a/bus_service_increase/bus_service_utils/create_parallel_corridors.py +++ b/bus_service_increase/bus_service_utils/create_parallel_corridors.py @@ -14,7 +14,7 @@ from typing import Literal -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils DATA_PATH = "./data/" diff --git a/bus_service_increase/competitive-routes.ipynb b/bus_service_increase/competitive-routes.ipynb index 9e0ada7d0..a2c333ac3 100644 --- a/bus_service_increase/competitive-routes.ipynb +++ b/bus_service_increase/competitive-routes.ipynb @@ -22,7 +22,8 @@ "\n", "import parallel_corridors_utils\n", "import deploy_portfolio_yaml\n", - "from shared_utils import styleguide, portfolio_utils\n", + "from shared_utils import portfolio_utils\n", + "from calitp_data_analysis import styleguide\n", "\n", "catalog = intake.open_catalog(\"./*.yml\")\n", "alt.renderers.enable(\"html\")\n", diff --git a/bus_service_increase/create_analysis_data.py b/bus_service_increase/create_analysis_data.py index 18bf0ad44..81b4fed69 100644 --- a/bus_service_increase/create_analysis_data.py +++ b/bus_service_increase/create_analysis_data.py @@ -4,8 +4,8 @@ import numpy as np import pandas as pd -from bus_service_utils import utils -import shared_utils +from bus_service_utils import utils as bus_utils +from calitp_data_analysis import geography_utils, utils import warehouse_queries from calitp_data_analysis.tables import tbls @@ -23,7 +23,7 @@ def get_time_calculations(df: pd.DataFrame) -> pd.DataFrame: ## time calculations df = df.assign( date = pd.to_datetime(df.date), - departure_time = df.departure_time.dropna().apply(utils.fix_gtfs_time), + departure_time = df.departure_time.dropna().apply(bus_utils.fix_gtfs_time), ) # Something weird comes up trying to generate departure_dt @@ -267,7 +267,7 @@ def create_service_estimator_data(): # Categorize tracts and add back further process the operator-route-level df def generate_shape_categories(shapes_df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: shapes_df = (shapes_df.reset_index(drop=True) - .to_crs(shared_utils.geography_utils.CA_NAD83Albers) + .to_crs(geography_utils.CA_NAD83Albers) ) shapes_df = shapes_df.assign( @@ -276,7 +276,7 @@ def generate_shape_categories(shapes_df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ## quick fix for invalid geometries? ces_df = (catalog.calenviroscreen_lehd_by_tract.read() - .to_crs(shared_utils.geography_utils.CA_NAD83Albers) + .to_crs(geography_utils.CA_NAD83Albers) ) ces_df = ces_df.assign( @@ -328,14 +328,14 @@ def create_shapes_tract_categorized(): print(f"Grab ITP IDs") print(itp_ids) - all_shapes = shared_utils.geography_utils.make_routes_shapefile( + all_shapes = geography_utils.make_routes_shapefile( ITP_ID_LIST = itp_ids) time1 = dt.datetime.now() print(f"Execution time to make routes shapefile: {time1-time0}") # Upload to GCS - shared_utils.utils.geoparquet_gcs_export(all_shapes, DATA_PATH, 'shapes_initial') + utils.geoparquet_gcs_export(all_shapes, DATA_PATH, 'shapes_initial') all_shapes = gpd.read_parquet(f"{DATA_PATH}shapes_initial.parquet") @@ -344,7 +344,7 @@ def create_shapes_tract_categorized(): processed_shapes = processed_shapes.apply(categorize_shape, axis=1) print(f"Execution time to categorize routes: {time2-time1}") - shared_utils.utils.geoparquet_gcs_export(processed_shapes, DATA_PATH, + utils.geoparquet_gcs_export(processed_shapes, DATA_PATH, 'shapes_processed') print(f"Total execution time: {time2-time0}") diff --git a/bus_service_increase/explore-aggregation-examples.ipynb b/bus_service_increase/explore-aggregation-examples.ipynb index 56ab0a9b4..f4bba1a4e 100644 --- a/bus_service_increase/explore-aggregation-examples.ipynb +++ b/bus_service_increase/explore-aggregation-examples.ipynb @@ -29,7 +29,7 @@ "import geopandas as gpd\n", "import pandas as pd\n", "\n", - "from shared_utils import geography_utils\n", + "from shared_utils import portfolio_utils\n", "\n", "DATA_PATH = \"./data/\"" ] @@ -92,7 +92,7 @@ "# % parallel = (# routes-hwy that are parallel / # routes-hwy combination)\n", "\n", "part1 = (\n", - " geography_utils.aggregate_by_geography(\n", + " portfolio_utils.aggregate_by_geography(\n", " gdf, \n", " group_cols = operator_group_cols,\n", " sum_cols = [\"parallel\"],\n", @@ -1308,7 +1308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/bus_service_increase/highways-existing-transit.ipynb b/bus_service_increase/highways-existing-transit.ipynb index 6353f7876..131a577c5 100644 --- a/bus_service_increase/highways-existing-transit.ipynb +++ b/bus_service_increase/highways-existing-transit.ipynb @@ -31,8 +31,8 @@ "\n", "from IPython.display import Markdown, HTML\n", "\n", - "from shared_utils import geography_utils \n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import geography_utils \n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "from bus_service_utils import better_bus_utils\n", "\n", "catalog = intake.open_catalog(\"../bus_service_increase/*.yml\")\n", diff --git a/bus_service_increase/highways-uncompetitive-routes.ipynb b/bus_service_increase/highways-uncompetitive-routes.ipynb index 71e7d50a7..ea1f21345 100644 --- a/bus_service_increase/highways-uncompetitive-routes.ipynb +++ b/bus_service_increase/highways-uncompetitive-routes.ipynb @@ -54,7 +54,7 @@ "\n", "from IPython.display import Markdown, HTML\n", "\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "catalog = intake.open_catalog(\"./*.yml\")" ] diff --git a/bus_service_increase/make_tract_viz.py b/bus_service_increase/make_tract_viz.py index c95b276c0..376bf5aad 100644 --- a/bus_service_increase/make_tract_viz.py +++ b/bus_service_increase/make_tract_viz.py @@ -5,8 +5,8 @@ from bus_service_utils import utils from setup_tract_charts import * -from shared_utils import geography_utils -from shared_utils import calitp_color_palette as cp +from calitp_data_analysis import calitp_color_palette as cp +from shared_utils import portfolio_utils catalog = intake.open_catalog("./catalog.yml") @@ -77,7 +77,7 @@ def import_processed_data()-> pd.DataFrame: def aggregate_generate_stats(df: pd.DataFrame, group_cols: list) -> pd.DataFrame: # After subset - t1 = geography_utils.aggregate_by_geography( + t1 = portfolio_utils.aggregate_by_geography( df, group_cols = group_cols, sum_cols = ["stop_id", "itp_id", "Population", diff --git a/bus_service_increase/parallel_corridors_utils.py b/bus_service_increase/parallel_corridors_utils.py index c0558872b..813fc69d4 100644 --- a/bus_service_increase/parallel_corridors_utils.py +++ b/bus_service_increase/parallel_corridors_utils.py @@ -13,8 +13,8 @@ from IPython.display import Markdown, HTML, display_html from typing import Union -from shared_utils import calitp_color_palette as cp -from shared_utils import styleguide +from calitp_data_analysis import calitp_color_palette as cp +from calitp_data_analysis import styleguide from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE alt.themes.register("calitp_theme", styleguide.calitp_theme) @@ -282,19 +282,12 @@ def make_map(gdf: gpd.GeoDataFrame): colors = cp.CALITP_CATEGORY_BOLD_COLORS, ) - m = map_utils.make_folium_choropleth_map( - gdf, - plot_col = PLOT_COL, - popup_dict = POPUP_DICT, tooltip_dict = POPUP_DICT, - colorscale = COLORSCALE, - fig_width = FIG_WIDTH, - fig_height = FIG_HEIGHT, - zoom = 10, - centroid = [gdf.geometry.centroid.y, - gdf.geometry.centroid.x, - ], - title = f"{gdf.hwy_route_name.iloc[0]}", - legend_name = None, + print(f"{gdf.hwy_route_name.iloc[0]}") + m = gdf.explore( + PLOT_COL, + tooltip = list(POPUP_DICT.keys()), + cmap = COLORSCALE, + legend = False ) return m diff --git a/bus_service_increase/setup_corridors_stats.py b/bus_service_increase/setup_corridors_stats.py index 30e0543f9..eb3612f64 100644 --- a/bus_service_increase/setup_corridors_stats.py +++ b/bus_service_increase/setup_corridors_stats.py @@ -10,7 +10,8 @@ import intake import pandas as pd -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils +from shared_utils import portfolio_utils catalog = intake.open_catalog("./*.yml") @@ -112,7 +113,7 @@ def aggregate_operators(df: gpd.GeoDataFrame) -> pd.DataFrame: .reset_index() ) - df3 = geography_utils.aggregate_by_geography( + df3 = portfolio_utils.aggregate_by_geography( df2, group_cols = group_cols, sum_cols = ["parallel", "competitive"], diff --git a/bus_service_increase/setup_service_increase.py b/bus_service_increase/setup_service_increase.py index d82d7848d..cec33994a 100644 --- a/bus_service_increase/setup_service_increase.py +++ b/bus_service_increase/setup_service_increase.py @@ -5,8 +5,7 @@ from calitp_data_analysis.tables import tbls from siuba import * -from bus_service_utils import utils -import shared_utils +from bus_service_utils import utils as bus_utils import warehouse_queries # Use sub-folder for Jan 2022 @@ -136,7 +135,7 @@ def calculate_additional_trips_service_hours(df): def prep_ntd_metrics(): - df = (pd.read_csv(f"{utils.GCS_FILE_PATH}ntd_metrics_2019.csv") + df = (pd.read_csv(f"{bus_utils.GCS_FILE_PATH}ntd_metrics_2019.csv") >> filter(_.State == 'CA') ) @@ -164,7 +163,7 @@ def fix_vrh(value): def prep_ntd_vehicles(): - df = (pd.read_csv(f"{utils.GCS_FILE_PATH}ntd_vehicles_2019.csv") + df = (pd.read_csv(f"{bus_utils.GCS_FILE_PATH}ntd_vehicles_2019.csv") >> filter(_.State == 'CA') ) diff --git a/bus_service_increase/setup_tract_charts.py b/bus_service_increase/setup_tract_charts.py index 42786cc58..eaaf506a9 100644 --- a/bus_service_increase/setup_tract_charts.py +++ b/bus_service_increase/setup_tract_charts.py @@ -6,8 +6,8 @@ import shared_utils from bus_service_utils import utils, chart_utils -from shared_utils import styleguide, geography_utils -from shared_utils import calitp_color_palette as cp +from calitp_data_analysis import styleguide +from calitp_data_analysis import calitp_color_palette as cp # Set charting style guide alt.data_transformers.enable('default', max_rows=10_000) diff --git a/bus_service_increase/transit-deserts-uncompetitive.ipynb b/bus_service_increase/transit-deserts-uncompetitive.ipynb index 5df21fb07..0e344b14b 100644 --- a/bus_service_increase/transit-deserts-uncompetitive.ipynb +++ b/bus_service_increase/transit-deserts-uncompetitive.ipynb @@ -18,8 +18,8 @@ "\n", "from typing import Literal\n", "\n", - "from shared_utils import geography_utils\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import geography_utils\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "from bus_service_utils import better_bus_utils\n", "\n", "catalog = intake.open_catalog(\"../bus_service_increase/*.yml\")" diff --git a/bus_service_increase/transit-on-shn.ipynb b/bus_service_increase/transit-on-shn.ipynb index 339fde1bb..10f6781bb 100644 --- a/bus_service_increase/transit-on-shn.ipynb +++ b/bus_service_increase/transit-on-shn.ipynb @@ -20,7 +20,7 @@ "from IPython.display import Markdown, HTML\n", "from typing import Literal\n", "\n", - "from shared_utils import geography_utils, portfolio_utils\n", + "from shared_utils import portfolio_utils\n", "from bus_service_utils import better_bus_utils\n", "\n", "catalog = intake.open_catalog(\"../bus_service_increase/*.yml\")" @@ -221,7 +221,7 @@ " group_cols = ['Route', 'County', 'RouteType']\n", " \n", " if not gdf.mean_speed_mph_trip_weighted.isnull().all():\n", - " by_highway = geography_utils.aggregate_by_geography(\n", + " by_highway = portfolio_utils.aggregate_by_geography(\n", " gdf,\n", " group_cols,\n", " sum_cols = [\"trips_all_day_per_mi\", \"trips_peak_per_mi\", \n", @@ -231,7 +231,7 @@ " mean_cols = [\"mean_speed_mph_trip_weighted\"]\n", " )\n", " else:\n", - " by_highway = geography_utils.aggregate_by_geography(\n", + " by_highway = portfolio_utils.aggregate_by_geography(\n", " gdf,\n", " group_cols,\n", " sum_cols = [\"trips_all_day_per_mi\", \"trips_peak_per_mi\", \n", diff --git a/bus_service_increase/warehouse_queries.py b/bus_service_increase/warehouse_queries.py index bf2ed069b..c30bc01b3 100644 --- a/bus_service_increase/warehouse_queries.py +++ b/bus_service_increase/warehouse_queries.py @@ -8,8 +8,8 @@ from calitp_data_analysis.tables import tbls from siuba import * -from bus_service_utils import utils -import shared_utils +from bus_service_utils import utils as bus_utils +from shared_utils import gtfs_utils ''' Stash datasets in GCS to read in create_service_estimator.py @@ -22,10 +22,10 @@ service estimation and tract bus arrivals. Keep those in `utils.GCS_FILE_PATH`. -New sub-folders take form: f"{utils.GCS_FILE_PATH}SUBFOLDER_NAME/" +New sub-folders take form: f"{bus_utils.GCS_FILE_PATH}SUBFOLDER_NAME/" ''' -DATA_PATH = f"{utils.GCS_FILE_PATH}2022_Jan/" +DATA_PATH = f"{bus_utils.GCS_FILE_PATH}2022_Jan/" #---------------------------------------------------------------# # Set dates for analysis @@ -55,7 +55,7 @@ def grab_selected_trips_for_date(selected_date): "trip_key", "trip_id", "is_in_service" ] - trips = shared_utils.gtfs_utils.get_trips( + trips = gtfs_utils.get_trips( selected_date = selected_date, itp_id_list = None, trip_cols = keep_trip_cols, @@ -68,7 +68,7 @@ def grab_selected_trips_for_date(selected_date): "stop_sequence", "stop_id" ] - stop_times = shared_utils.gtfs_utils.get_stop_times( + stop_times = gtfs_utils.get_stop_times( selected_date = selected_date, itp_id_list = None, stop_time_cols = keep_stop_time_cols, @@ -121,7 +121,7 @@ def calculate_arrivals_at_stop(day_of_week: str = "thurs", trips_on_day = pd.read_parquet(f"{DATA_PATH}trips_joined_{day_of_week}.parquet") # this handles multiple url_feeds already, finds distinct in dim_stop_times - stop_times = shared_utils.gtfs_utils.get_stop_times( + stop_times = gtfs_utils.get_stop_times( selected_date = selected_date, itp_id_list = None, get_df = False, # return dask df @@ -140,11 +140,12 @@ def calculate_arrivals_at_stop(day_of_week: str = "thurs", ).compute() - daily_stop_times.to_parquet(f"{utils.GCS_FILE_PATH}daily_stop_times.parquet") + daily_stop_times.to_parquet(f"{bus_utils.GCS_FILE_PATH}daily_stop_times.parquet") def process_daily_stop_times(selected_date): - daily_stop_times = pd.read_parquet(f"{utils.GCS_FILE_PATH}daily_stop_times.parquet") + daily_stop_times = pd.read_parquet( + f"{bus_utils.GCS_FILE_PATH}daily_stop_times.parquet") # Handle some exclusions daily_stop_times = daily_stop_times[daily_stop_times.calitp_itp_id != 200] @@ -155,7 +156,7 @@ def process_daily_stop_times(selected_date): "stop_lon", "stop_lat", ] - stop_geom = shared_utils.gtfs_utils.get_stops( + stop_geom = gtfs_utils.get_stops( selected_date = selected_date, itp_id_list = None, stop_cols = keep_stop_cols, @@ -169,7 +170,7 @@ def process_daily_stop_times(selected_date): ) aggregated_stops_with_geom.to_parquet( - f"{utils.GCS_FILE_PATH}aggregated_stops_with_geom.parquet") + f"{bus_utils.GCS_FILE_PATH}aggregated_stops_with_geom.parquet") if __name__ == "__main__": From 31a7e83c74989b8b12b8212aee343ef37338ce42 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 20:43:58 +0000 Subject: [PATCH 02/14] shared_utils to calitp_data_analysis for facilities_services/ --- .../A5_compile_geocode_results.py | 21 ++++++++++--------- facilities_services/A7_manually_geocode.py | 18 ++++++++-------- .../A8_assemble_geocoded_results.py | 21 ++++++++++--------- .../A9_create_processed_data.py | 20 +++++++++--------- facilities_services/B1_layers_to_plot.py | 2 +- facilities_services/B2_chart_utils.py | 4 ++-- .../tier1-facilities-hqta.ipynb | 7 ++++--- 7 files changed, 48 insertions(+), 45 deletions(-) diff --git a/facilities_services/A5_compile_geocode_results.py b/facilities_services/A5_compile_geocode_results.py index 901a074c1..e16a4ea26 100644 --- a/facilities_services/A5_compile_geocode_results.py +++ b/facilities_services/A5_compile_geocode_results.py @@ -5,8 +5,8 @@ import pandas as pd import A4_geocode -import utils -import shared_utils +import utils as _utils +from calitp_data_analysis import geography_utils, utils # Parse the results dict and compile as pd.Series def compile_results(results: dict) -> pd.Series: @@ -33,10 +33,10 @@ def compile_results(results: dict) -> pd.Series: for i in unique_uuid: # Grab cached result - result = utils.open_request_json( + result = _utils.open_request_json( i, - DATA_PATH = utils.DATA_PATH, - GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}arcgis_geocode/" + DATA_PATH = _utils.DATA_PATH, + GCS_FILE_PATH = f"{_utils.GCS_FILE_PATH}arcgis_geocode/" ) # Compile JSON into pd.Series @@ -54,14 +54,15 @@ def compile_results(results: dict) -> pd.Series: # Export results to GCS print(f"# geocoded results: {len(full_results)}") - gdf = shared_utils.geography_utils.create_point_geometry( + gdf = geography_utils.create_point_geometry( full_results, longitude_col = "longitude", latitude_col = "latitude", ).drop(columns = ["longitude", "latitude"]) - shared_utils.utils.geoparquet_gcs_export(gdf, - utils.GCS_FILE_PATH, - "geocoder_results" - ) \ No newline at end of file + utils.geoparquet_gcs_export( + gdf, + _utils.GCS_FILE_PATH, + "geocoder_results" + ) \ No newline at end of file diff --git a/facilities_services/A7_manually_geocode.py b/facilities_services/A7_manually_geocode.py index 1118853d4..f85db1ed6 100644 --- a/facilities_services/A7_manually_geocode.py +++ b/facilities_services/A7_manually_geocode.py @@ -8,12 +8,12 @@ import geopandas as gpd import pandas as pd -import utils -import shared_utils +import utils as _utils +from calitp_data_analysis import geography_utils, utils # Basic cleaning of SHN postmiles dataset def clean_postmiles() -> gpd.GeoDataFrame: - df = gpd.read_parquet(f"{utils.DATA_PATH}shn_postmiles.parquet") + df = gpd.read_parquet(f"{_utils.DATA_PATH}shn_postmiles.parquet") # Round to 2 decimal places # otherwise, floats are giving trouble @@ -147,7 +147,7 @@ def add_lat_lon(row)-> pd.Series: lat_lon = df.apply(add_lat_lon, axis=1) df2 = pd.concat([df, lat_lon], axis=1) - gdf = shared_utils.geography_utils.create_point_geometry( + gdf = geography_utils.create_point_geometry( df2, longitude_col="longitude", latitude_col = "latitude" @@ -158,7 +158,7 @@ def add_lat_lon(row)-> pd.Series: if __name__ == "__main__": - df = pd.read_parquet(f"{utils.GCS_FILE_PATH}manual_geocoding.parquet") + df = pd.read_parquet(f"{_utils.GCS_FILE_PATH}manual_geocoding.parquet") df2 = subset_manual_geocoding(df) df3 = parse_postmiles(df2) @@ -182,8 +182,8 @@ def add_lat_lon(row)-> pd.Series: gdf = pd.concat([df5, manual_ones2], axis=0, ignore_index=True) # Export to GCS - shared_utils.utils.geoparquet_gcs_export(gdf, - utils.GCS_FILE_PATH, - "manually_geocoded_results" - ) + utils.geoparquet_gcs_export(gdf, + bus_utils.GCS_FILE_PATH, + "manually_geocoded_results" + ) \ No newline at end of file diff --git a/facilities_services/A8_assemble_geocoded_results.py b/facilities_services/A8_assemble_geocoded_results.py index 3fed27271..d5fff1e78 100644 --- a/facilities_services/A8_assemble_geocoded_results.py +++ b/facilities_services/A8_assemble_geocoded_results.py @@ -5,9 +5,9 @@ import intake import pandas as pd -import utils -import shared_utils +import utils as _utils import A3_prep_for_geocode +from calitp_data_analysis import utils catalog = intake.open_catalog("./*.yml") @@ -17,13 +17,13 @@ df = catalog.tier1_facilities_addresses.read() df = A3_prep_for_geocode.prep_for_geocoding(df) - geocoder_df = shared_utils.utils.download_geoparquet( - GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}", + geocoder_df = utils.download_geoparquet( + GCS_FILE_PATH = f"{_utils.GCS_FILE_PATH}", FILE_NAME = "geocoder_results" ) - manual_df = shared_utils.utils.download_geoparquet( - GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}", + manual_df = utils.download_geoparquet( + GCS_FILE_PATH = f"{_utils.GCS_FILE_PATH}", FILE_NAME = "manually_geocoded_results" ) @@ -75,7 +75,8 @@ print(f"# obs in final df: {len(final)}") # Export to GCS - shared_utils.utils.geoparquet_gcs_export(final, - utils.GCS_FILE_PATH, - "tier1_facilities_geocoded" - ) \ No newline at end of file + utils.geoparquet_gcs_export( + final, + _utils.GCS_FILE_PATH, + "tier1_facilities_geocoded" + ) \ No newline at end of file diff --git a/facilities_services/A9_create_processed_data.py b/facilities_services/A9_create_processed_data.py index 28288d947..0a51b10da 100644 --- a/facilities_services/A9_create_processed_data.py +++ b/facilities_services/A9_create_processed_data.py @@ -9,10 +9,9 @@ import intake import pandas as pd -from calitp import to_snakecase - -import utils -import shared_utils +import utils as _utils +from calitp_data_analysis import geography_utils, utils +from calitp_data_analysis.sql import to_snakecase catalog = intake.open_catalog("./*.yml") @@ -52,8 +51,8 @@ def sjoin_to_geography(df: gpd.GeoDataFrame, Join facilities (points) to some polygon geometry (county or district) ''' s1 = gpd.sjoin( - df.to_crs(shared_utils.geography_utils.WGS84), - geog_df.to_crs(shared_utils.geography_utils.WGS84), + df.to_crs(geography_utils.WGS84), + geog_df.to_crs(geography_utils.WGS84), how = "left", predicate = "intersects" ).drop(columns = "index_right") @@ -95,7 +94,8 @@ def sjoin_to_geography(df: gpd.GeoDataFrame, ).drop(columns = "district_orig") # Export to GCS - shared_utils.utils.geoparquet_gcs_export(gdf4, - utils.GCS_FILE_PATH, - "tier1_facilities_processed" - ) \ No newline at end of file + utils.geoparquet_gcs_export( + gdf4, + _utils.GCS_FILE_PATH, + "tier1_facilities_processed" + ) \ No newline at end of file diff --git a/facilities_services/B1_layers_to_plot.py b/facilities_services/B1_layers_to_plot.py index 9444580b7..f590422c9 100644 --- a/facilities_services/B1_layers_to_plot.py +++ b/facilities_services/B1_layers_to_plot.py @@ -7,7 +7,7 @@ import pandas as pd import utils -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils catalog = intake.open_catalog("./*.yml") diff --git a/facilities_services/B2_chart_utils.py b/facilities_services/B2_chart_utils.py index dff132a4a..cdd21ff1b 100644 --- a/facilities_services/B2_chart_utils.py +++ b/facilities_services/B2_chart_utils.py @@ -6,8 +6,8 @@ import altair as alt import pandas as pd -from shared_utils import calitp_color_palette as cp -from shared_utils import styleguide +from calitp_data_analysis import calitp_color_palette as cp +from calitp_data_analysis import styleguide def make_donut_chart(df: pd.DataFrame, y_col: str, color_col: str) -> alt.Chart: diff --git a/facilities_services/tier1-facilities-hqta.ipynb b/facilities_services/tier1-facilities-hqta.ipynb index 642b4d905..c139e3a9a 100644 --- a/facilities_services/tier1-facilities-hqta.ipynb +++ b/facilities_services/tier1-facilities-hqta.ipynb @@ -29,8 +29,9 @@ "from IPython.display import Markdown, HTML\n", "\n", "import utils\n", - "from shared_utils import geography_utils, portfolio_utils, styleguide\n", - "from shared_utils import calitp_color_palette as cp\n", + "from shared_utils import portfolio_utils\n", + "from calitp_data_analysis import geography_utils, styleguide\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "import B1_layers_to_plot\n", "import B2_chart_utils as chart_utils\n", "\n", @@ -80,7 +81,7 @@ "source": [ "def aggregate_stats(df, group_cols):\n", "\n", - " df2 = geography_utils.aggregate_by_geography(\n", + " df2 = portfolio_utils.aggregate_by_geography(\n", " df, \n", " group_cols = group_cols,\n", " sum_cols = [\"sqft\"],\n", From c6eb3f7ce189742f4201053e1275d2b5b8568a62 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 20:49:04 +0000 Subject: [PATCH 03/14] shared_utils to calitp_data_analysis for gtfs_schedule/ --- .../03_speeds_service_calenviroscreen.ipynb | 17 ++++++++--------- gtfs_schedule/compare_trips_v1_v2.ipynb | 6 +++--- .../trips_service_hours_v1_v2_query.ipynb | 5 ++--- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/gtfs_schedule/03_speeds_service_calenviroscreen.ipynb b/gtfs_schedule/03_speeds_service_calenviroscreen.ipynb index f46a55162..7d650e367 100644 --- a/gtfs_schedule/03_speeds_service_calenviroscreen.ipynb +++ b/gtfs_schedule/03_speeds_service_calenviroscreen.ipynb @@ -41,10 +41,9 @@ "from calitp_data_analysis.tables import tbls\n", "from siuba import *\n", "\n", - "from shared_utils import geography_utils, rt_dates, rt_utils\n", - "from shared_utils import calitp_color_palette as cp\n", - "from shared_utils import styleguide\n", - "from segment_speed_utils import segment_calcs\n", + "from shared_utils import portfolio_utils, rt_dates, rt_utils, schedule_rt_utils\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from calitp_data_analysis import geography_utils, styleguide\n", "\n", "GCS_FILE_PATH = \"gs://calitp-analytics-data/data-analyses/\"\n", "BUS_SERVICE_GCS = f\"{GCS_FILE_PATH}bus_service_increase/\"\n", @@ -162,7 +161,7 @@ " peak = df.time_of_day.apply(categorize_peak_off_peak)\n", " ) \n", " \n", - " by_route = geography_utils.aggregate_by_geography(\n", + " by_route = portfolio_utils.aggregate_by_geography(\n", " df,\n", " group_cols = [\"name\", \"source_record_id\", \n", " \"route_id\", \"route_short_name\", \n", @@ -225,8 +224,8 @@ " .compute()\n", " )\n", "\n", - " trip_start = segment_calcs.localize_vp_timestamp(\n", - " trip_start_time, \"min_time\"\n", + " trip_start = schedule_rt_utils.localize_vp_timestamp(\n", + " trip_start_time, [\"min_time\"]\n", " )\n", " \n", " trip_start = trip_start.assign(\n", @@ -290,7 +289,7 @@ "outputs": [], "source": [ "# Find average speed for peak vs off peak for each segment\n", - "avg_speeds_by_peak = geography_utils.aggregate_by_geography(\n", + "avg_speeds_by_peak = portfolio_utils.aggregate_by_geography(\n", " speeds_with_daytype.compute(),\n", " group_cols = [\"gtfs_dataset_key\", \"_gtfs_dataset_name\",\n", " \"route_dir_identifier\", \"segment_sequence\", \n", @@ -390,7 +389,7 @@ "metadata": {}, "outputs": [], "source": [ - "avg_speeds_by_equity = geography_utils.aggregate_by_geography(\n", + "avg_speeds_by_equity = portfolio_utils.aggregate_by_geography(\n", " segments_with_tract,\n", " group_cols = [\"equity_group\", \"peak\"],\n", " sum_cols = [\"trip_id_nunique\"],\n", diff --git a/gtfs_schedule/compare_trips_v1_v2.ipynb b/gtfs_schedule/compare_trips_v1_v2.ipynb index 4d8d91ea6..d1f73dd2f 100644 --- a/gtfs_schedule/compare_trips_v1_v2.ipynb +++ b/gtfs_schedule/compare_trips_v1_v2.ipynb @@ -53,7 +53,7 @@ "source": [ "import pandas as pd\n", "\n", - "from shared_utils import rt_dates, geography_utils\n", + "from shared_utils import rt_dates, portfolio_utils\n", "\n", "oct_date = rt_dates.DATES[\"oct2022\"]\n", "GCS_FILE_PATH = \"gs://calitp-analytics-data/data-analyses/gtfs_v1_v2_parity/\"" @@ -1546,7 +1546,7 @@ " Aggregate service hours and count unique shape_id, trip_id,\n", " and route_id for v1 and v2 for 1 operator.\n", " \"\"\"\n", - " v1_agg = geography_utils.aggregate_by_geography(\n", + " v1_agg = portfolio_utils.aggregate_by_geography(\n", " oct1[oct1.calitp_itp_id == itp_id],\n", " group_cols = [\"calitp_itp_id\", \"calitp_url_number\"],\n", " sum_cols = [\"service_hours\"],\n", @@ -1554,7 +1554,7 @@ " rename_cols = True\n", " )\n", " \n", - " v2_agg = geography_utils.aggregate_by_geography(\n", + " v2_agg = portfolio_utils.aggregate_by_geography(\n", " oct2[oct2.feed_key == feed_key],\n", " group_cols = [\"feed_key\"],\n", " sum_cols = [\"service_hours\"],\n", diff --git a/gtfs_schedule/trips_service_hours_v1_v2_query.ipynb b/gtfs_schedule/trips_service_hours_v1_v2_query.ipynb index 0c745b511..86f4573b5 100644 --- a/gtfs_schedule/trips_service_hours_v1_v2_query.ipynb +++ b/gtfs_schedule/trips_service_hours_v1_v2_query.ipynb @@ -36,11 +36,10 @@ "import sys\n", "\n", "from loguru import logger\n", - "from calitp.tables import tbls\n", + "from calitp_data_analysis.tables import tbls\n", "from siuba import *\n", "\n", - "import gtfs_utils_v2\n", - "from shared_utils import rt_dates" + "from shared_utils import gtfs_utils_v2, rt_dates" ] }, { From 897b8e6a4e61553d601e765824e5fe6994d18bcb Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 20:55:32 +0000 Subject: [PATCH 04/14] shared_utils to calitp_data_analysis for high_quality_transit_areas/ --- high_quality_transit_areas/A1_download_rail_ferry_brt_stops.py | 2 +- high_quality_transit_areas/A2_combine_stops.py | 2 +- high_quality_transit_areas/B1_create_hqta_segments.py | 3 ++- high_quality_transit_areas/B2_sjoin_stops_to_segments.py | 2 +- high_quality_transit_areas/C1_prep_pairwise_intersections.py | 2 +- high_quality_transit_areas/C2_get_intersections.py | 2 +- high_quality_transit_areas/C3_create_bus_hqta_types.py | 2 +- high_quality_transit_areas/D1_assemble_hqta_points.py | 3 ++- high_quality_transit_areas/D2_assemble_hqta_polygons.py | 2 +- high_quality_transit_areas/hqta-map.ipynb | 2 +- high_quality_transit_areas/hqta_green_SACOG.ipynb | 3 +-- high_quality_transit_areas/utilities.py | 2 +- 12 files changed, 14 insertions(+), 13 deletions(-) diff --git a/high_quality_transit_areas/A1_download_rail_ferry_brt_stops.py b/high_quality_transit_areas/A1_download_rail_ferry_brt_stops.py index 17a2d74f3..089bcf666 100644 --- a/high_quality_transit_areas/A1_download_rail_ferry_brt_stops.py +++ b/high_quality_transit_areas/A1_download_rail_ferry_brt_stops.py @@ -8,7 +8,7 @@ import geopandas as gpd import pandas as pd -from shared_utils import utils +from calitp_data_analysis import utils from update_vars import analysis_date, COMPILED_CACHED_VIEWS, TEMP_GCS diff --git a/high_quality_transit_areas/A2_combine_stops.py b/high_quality_transit_areas/A2_combine_stops.py index 603eb3c51..999be7ed9 100644 --- a/high_quality_transit_areas/A2_combine_stops.py +++ b/high_quality_transit_areas/A2_combine_stops.py @@ -14,7 +14,7 @@ from loguru import logger import A1_download_rail_ferry_brt_stops as rail_ferry_brt -from shared_utils import utils +from calitp_data_analysis import utils from utilities import GCS_FILE_PATH, clip_to_ca from update_vars import analysis_date, TEMP_GCS diff --git a/high_quality_transit_areas/B1_create_hqta_segments.py b/high_quality_transit_areas/B1_create_hqta_segments.py index 954f4cf3f..750e1072d 100644 --- a/high_quality_transit_areas/B1_create_hqta_segments.py +++ b/high_quality_transit_areas/B1_create_hqta_segments.py @@ -30,7 +30,8 @@ from dask import delayed, compute import operators_for_hqta -from shared_utils import utils, geography_utils, rt_utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import rt_utils from segment_speed_utils import helpers, gtfs_schedule_wrangling from utilities import GCS_FILE_PATH from update_vars import analysis_date, COMPILED_CACHED_VIEWS diff --git a/high_quality_transit_areas/B2_sjoin_stops_to_segments.py b/high_quality_transit_areas/B2_sjoin_stops_to_segments.py index 0560f06bc..fc26ee836 100644 --- a/high_quality_transit_areas/B2_sjoin_stops_to_segments.py +++ b/high_quality_transit_areas/B2_sjoin_stops_to_segments.py @@ -16,7 +16,7 @@ from loguru import logger from typing import Union -from shared_utils import utils, gtfs_utils +from calitp_data_analysis import utils from utilities import GCS_FILE_PATH from update_vars import analysis_date, COMPILED_CACHED_VIEWS diff --git a/high_quality_transit_areas/C1_prep_pairwise_intersections.py b/high_quality_transit_areas/C1_prep_pairwise_intersections.py index c323e4f8e..fa6a47842 100644 --- a/high_quality_transit_areas/C1_prep_pairwise_intersections.py +++ b/high_quality_transit_areas/C1_prep_pairwise_intersections.py @@ -16,7 +16,7 @@ from loguru import logger -from shared_utils import utils +from calitp_data_analysis import utils from utilities import catalog_filepath, GCS_FILE_PATH from update_vars import analysis_date diff --git a/high_quality_transit_areas/C2_get_intersections.py b/high_quality_transit_areas/C2_get_intersections.py index 24dd68c08..9bf66d4a8 100644 --- a/high_quality_transit_areas/C2_get_intersections.py +++ b/high_quality_transit_areas/C2_get_intersections.py @@ -18,7 +18,7 @@ from loguru import logger -from shared_utils import utils +from calitp_data_analysis import utils from utilities import catalog_filepath, GCS_FILE_PATH from update_vars import analysis_date diff --git a/high_quality_transit_areas/C3_create_bus_hqta_types.py b/high_quality_transit_areas/C3_create_bus_hqta_types.py index ff28c7161..160153be5 100644 --- a/high_quality_transit_areas/C3_create_bus_hqta_types.py +++ b/high_quality_transit_areas/C3_create_bus_hqta_types.py @@ -17,7 +17,7 @@ from loguru import logger import C1_prep_pairwise_intersections as prep_clip -from shared_utils import utils +from calitp_data_analysis import utils from utilities import catalog_filepath, GCS_FILE_PATH from update_vars import analysis_date, COMPILED_CACHED_VIEWS diff --git a/high_quality_transit_areas/D1_assemble_hqta_points.py b/high_quality_transit_areas/D1_assemble_hqta_points.py index 67c34ba00..bf0a181a8 100644 --- a/high_quality_transit_areas/D1_assemble_hqta_points.py +++ b/high_quality_transit_areas/D1_assemble_hqta_points.py @@ -19,7 +19,8 @@ import A3_rail_ferry_brt_extract as rail_ferry_brt_extract import utilities -from shared_utils import utils, geography_utils, schedule_rt_utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import schedule_rt_utils from update_vars import analysis_date, TEMP_GCS, COMPILED_CACHED_VIEWS EXPORT_PATH = f"{utilities.GCS_FILE_PATH}export/{analysis_date}/" diff --git a/high_quality_transit_areas/D2_assemble_hqta_polygons.py b/high_quality_transit_areas/D2_assemble_hqta_polygons.py index 29a6f9d02..f8b38f5b4 100644 --- a/high_quality_transit_areas/D2_assemble_hqta_polygons.py +++ b/high_quality_transit_areas/D2_assemble_hqta_polygons.py @@ -16,7 +16,7 @@ import C1_prep_pairwise_intersections as prep_clip import D1_assemble_hqta_points as assemble_hqta_points import utilities -from shared_utils import utils, geography_utils +from calitp_data_analysis import utils, geography_utils from D1_assemble_hqta_points import (EXPORT_PATH, add_route_info) from update_vars import analysis_date diff --git a/high_quality_transit_areas/hqta-map.ipynb b/high_quality_transit_areas/hqta-map.ipynb index 7aa99c545..5d15ec7e0 100644 --- a/high_quality_transit_areas/hqta-map.ipynb +++ b/high_quality_transit_areas/hqta-map.ipynb @@ -32,7 +32,7 @@ "import intake\n", "import pandas as pd\n", "\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "from shared_utils import portfolio_utils\n", "\n", "GCS_FILE_PATH = (\"gs://calitp-analytics-data/data-analyses/\"\n", diff --git a/high_quality_transit_areas/hqta_green_SACOG.ipynb b/high_quality_transit_areas/hqta_green_SACOG.ipynb index 037187630..5bf79de39 100644 --- a/high_quality_transit_areas/hqta_green_SACOG.ipynb +++ b/high_quality_transit_areas/hqta_green_SACOG.ipynb @@ -25,8 +25,7 @@ "import geopandas as gpd\n", "\n", "from utilities import GCS_FILE_PATH\n", - "from shared_utils import geography_utils \n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "catalog = intake.open_catalog(\"./*.yml\")" ] diff --git a/high_quality_transit_areas/utilities.py b/high_quality_transit_areas/utilities.py index 3adb14cd0..172f13527 100644 --- a/high_quality_transit_areas/utilities.py +++ b/high_quality_transit_areas/utilities.py @@ -7,7 +7,7 @@ import shapely from typing import Union -from shared_utils import calitp_color_palette +from calitp_data_analysis import calitp_color_palette fs = gcsfs.GCSFileSystem() From e56c695ce9b6bf1c054344a4d52be52f15a841e8 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 20:57:51 +0000 Subject: [PATCH 05/14] shared_utils to calitp_data_analysis for la_metro_demo/ --- la_metro_demo/A2_clean_up_gtfs.py | 5 +++-- la_metro_demo/A3_assemble_data.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/la_metro_demo/A2_clean_up_gtfs.py b/la_metro_demo/A2_clean_up_gtfs.py index 06422694b..80a8355ab 100644 --- a/la_metro_demo/A2_clean_up_gtfs.py +++ b/la_metro_demo/A2_clean_up_gtfs.py @@ -3,7 +3,8 @@ import geopandas as gpd import pandas as pd -from shared_utils import geography_utils, rt_dates +from shared_utils import portfolio_utils, rt_dates +from calitp_data_analysis import geography_utils from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS # LA Metro data is for Oct 2022, so let's use the date we already downloaded @@ -191,7 +192,7 @@ def assemble_route_level_data(): # Merge in number of trips, but don't assign values to the rows we # duplicated. We don't want n_trips to be overinflated - trips_by_route = geography_utils.aggregate_by_geography( + trips_by_route = portfolio_utils.aggregate_by_geography( trips, group_cols = ["name"] + route_cols, nunique_cols = ["trip_id"], diff --git a/la_metro_demo/A3_assemble_data.py b/la_metro_demo/A3_assemble_data.py index c4b710575..f17682050 100644 --- a/la_metro_demo/A3_assemble_data.py +++ b/la_metro_demo/A3_assemble_data.py @@ -3,7 +3,8 @@ import geopandas as gpd import pandas as pd -from shared_utils import geography_utils, rt_dates +from calitp_data_analysis import geography_utils +from shared_utils import portfolio_utils, rt_dates from bus_service_utils import calenviroscreen_lehd_utils from segment_speed_utils.project_vars import SEGMENT_GCS, COMPILED_CACHED_VIEWS @@ -48,7 +49,7 @@ def aggregate_overlay_intersect_by_equity( "route_length"] equity_cols = ["overall_ptile_group"] - by_route_equity = geography_utils.aggregate_by_geography( + by_route_equity = portfolio_utils.aggregate_by_geography( gdf, group_cols = route_cols + equity_cols, sum_cols = ["intersect_length"], From 5ee01552dd1d1636c177801ffca50e5b9eb0a21d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 21:17:29 +0000 Subject: [PATCH 06/14] shared_utils to calitp_data_analysis for msd_dashboard_metric/ --- .../01_area_population_metrics.ipynb | 44 +++++++++---------- .../02_coverage_mapping.ipynb | 5 +-- .../03_accessibility_feeds.ipynb | 6 +-- msd_dashboard_metric/06_summary_metrics.ipynb | 25 +++++------ msd_dashboard_metric/07_fares_v2.ipynb | 9 ++-- msd_dashboard_metric/create_coverage_data.py | 38 ++++++++-------- msd_dashboard_metric/setup_charts.py | 4 +- msd_dashboard_metric/utils.py | 14 +++--- 8 files changed, 68 insertions(+), 77 deletions(-) diff --git a/msd_dashboard_metric/01_area_population_metrics.ipynb b/msd_dashboard_metric/01_area_population_metrics.ipynb index cffb321a4..faaccf93a 100644 --- a/msd_dashboard_metric/01_area_population_metrics.ipynb +++ b/msd_dashboard_metric/01_area_population_metrics.ipynb @@ -22,11 +22,10 @@ "\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(100_000_000_000)\n", "\n", - "from calitp.tables import tbl\n", + "from calitp_data_analysis.tables import tbls\n", "from siuba import *\n", "from IPython.display import Markdown\n", "\n", - "import shared_utils\n", "from utils import *" ] }, @@ -40,22 +39,20 @@ "#create_coverage_data.save_initial_data()\n", "\n", "# Read in data from queries\n", - "ca_block_joined = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n", - " 'block_population_joined')\n", + "ca_block_joined = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_population_joined.parquet\")\n", "rt_complete = pd.read_parquet(f\"{GCS_FILE_PATH}rt_complete.parquet\")\n", - "all_stops = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n", - " 'all_stops')\n", - "accessible_stops_trips = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n", - " 'accessible_stops_trips')\n", + "all_stops = gpd.read_parquet(f\"{GCS_FILE_PATH}all_stops.parquet\")\n", + "accessible_stops_trips = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}accessible_stops_trips.parquet\")\n", "\n", "# Read in employment data by tract\n", "#tract_pop_employ_filtered = create_coverage_data.get_employment_tract_data()\n", "#shared_utils.utils.geoparquet_gcs_export(tract_pop_employ_filtered, \n", "# GCS_FILE_PATH, 'tract_pop_employ_filtered')\n", "\n", - "tract_pop_employ_filtered = shared_utils.utils.download_geoparquet(GCS_FILE_PATH,\n", - " 'tract_pop_employ_filtered'\n", - " )" + "tract_pop_employ_filtered = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}tract_pop_employ_filtered.parquet\")" ] }, { @@ -98,8 +95,7 @@ "\n", "for key, value in rename_block_files.items():\n", " print(key)\n", - " sjoin_blocks[key] = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, value)" + " sjoin_blocks[key] = gpd.read_parquet(f\"{GCS_FILE_PATH}{value}.parquet\")" ] }, { @@ -256,8 +252,8 @@ } ], "source": [ - "block_level_static = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, \"block_level_static\")\n", + "block_level_static = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_level_static.parquet\")\n", "\n", "display(Markdown(f\"### All Stops Static\"))\n", "\n", @@ -541,8 +537,8 @@ "\n", "for t in tract_files:\n", " print(t)\n", - " sjoin_tracts[t] = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, t)" + " sjoin_tracts[t] = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}{t}.parquet\")" ] }, { @@ -596,8 +592,8 @@ "source": [ "def make_coverage_summary():\n", " \n", - " tract_df = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, \"tract_all_stops\")\n", + " tract_df = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}tract_all_stops.parquet\")\n", " \n", " ## since employment data is tract-level, only includes tracts < 4 sq km (~60% of jobs)\n", " employment_summary = (tract_df\n", @@ -610,11 +606,11 @@ "\n", " SQ_MI_PER_SQ_M = 3.86e-7\n", " \n", - " block_level_static = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, \"block_level_static\")\n", + " block_level_static = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_level_static.parquet\")\n", " \n", - " block_level_accessible = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, \"block_level_accessible\")\n", + " block_level_accessible = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_level_accessible.parquet\")\n", " \n", " coverage_summary = (block_level_static\n", " >> group_by(_.calitp_itp_id)\n", @@ -688,7 +684,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/msd_dashboard_metric/02_coverage_mapping.ipynb b/msd_dashboard_metric/02_coverage_mapping.ipynb index 991cfd7d5..6f43e76ed 100644 --- a/msd_dashboard_metric/02_coverage_mapping.ipynb +++ b/msd_dashboard_metric/02_coverage_mapping.ipynb @@ -22,8 +22,7 @@ "\n", "from siuba import *\n", "\n", - "from shared_utils import map_utils\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "from utils import *" ] }, @@ -515,7 +514,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/msd_dashboard_metric/03_accessibility_feeds.ipynb b/msd_dashboard_metric/03_accessibility_feeds.ipynb index 2d8dc192f..6e41d4365 100644 --- a/msd_dashboard_metric/03_accessibility_feeds.ipynb +++ b/msd_dashboard_metric/03_accessibility_feeds.ipynb @@ -44,8 +44,8 @@ "import warehouse_queries\n", "import create_accessibility_data\n", "import setup_charts\n", - "from shared_utils import styleguide\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import styleguide\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", "\n", "alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n", "# enable\n", @@ -508,7 +508,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/msd_dashboard_metric/06_summary_metrics.ipynb b/msd_dashboard_metric/06_summary_metrics.ipynb index e7f819cc9..60a1811cd 100644 --- a/msd_dashboard_metric/06_summary_metrics.ipynb +++ b/msd_dashboard_metric/06_summary_metrics.ipynb @@ -20,8 +20,6 @@ "import geopandas as gpd\n", "\n", "from siuba import *\n", - "\n", - "import shared_utils\n", "from utils import *" ] }, @@ -32,11 +30,11 @@ "metadata": {}, "outputs": [], "source": [ - "ca_block_joined = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n", - " 'block_population_joined')\n", - "tract_pop_employ_filtered = shared_utils.utils.download_geoparquet(GCS_FILE_PATH,\n", - " 'tract_pop_employ_filtered'\n", - " )" + "ca_block_joined = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_population_joined.parquet\")\n", + "\n", + "tract_pop_employ_filtered = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}tract_pop_employ_filtered.parquet\")" ] }, { @@ -67,12 +65,11 @@ "\n", "for key, value in rename_block_files.items():\n", " print(key)\n", - " sjoin_blocks[key] = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, value)\n", + " sjoin_blocks[key] = gpd.read_parquet(f\"{GCS_FILE_PATH}{value}.parquet\")\n", "\n", "# This one needs to be read in as df, in a dict, kernel will crash\n", - "block_level_static = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n", - " \"block_level_static\")" + "block_level_static = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}block_level_static.parquet\")" ] }, { @@ -101,8 +98,8 @@ "\n", "for t in tract_files:\n", " print(t)\n", - " sjoin_tracts[t] = shared_utils.utils.download_geoparquet(\n", - " GCS_FILE_PATH, t)" + " sjoin_tracts[t] = gpd.read_parquet(\n", + " f\"{GCS_FILE_PATH}{t}.parquet\")" ] }, { @@ -282,7 +279,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/msd_dashboard_metric/07_fares_v2.ipynb b/msd_dashboard_metric/07_fares_v2.ipynb index 4a1f7b4f0..60f47794c 100644 --- a/msd_dashboard_metric/07_fares_v2.ipynb +++ b/msd_dashboard_metric/07_fares_v2.ipynb @@ -42,8 +42,9 @@ "import create_accessibility_data\n", "import setup_charts\n", "import utils\n", - "from shared_utils import geography_utils, styleguide\n", - "from shared_utils import calitp_color_palette as cp\n", + "from calitp_data_analysis import styleguide\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from shared_utils import portfolio_utils\n", "\n", "display(Markdown(\n", " f\"Report updated / data available through: \"\n", @@ -69,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "feeds_by_date = (geography_utils.aggregate_by_geography(\n", + "feeds_by_date = (portfolio_utils.aggregate_by_geography(\n", " fares_feeds,\n", " group_cols = [\"date\"],\n", " count_cols = [\"feed_key\"]\n", @@ -200,7 +201,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/msd_dashboard_metric/create_coverage_data.py b/msd_dashboard_metric/create_coverage_data.py index b3da4b1e6..e4fdcad65 100644 --- a/msd_dashboard_metric/create_coverage_data.py +++ b/msd_dashboard_metric/create_coverage_data.py @@ -6,12 +6,12 @@ os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000) -from calitp.tables import tbl +from calitp_data_analysis.tables import tbls from calitp_data_analysis.sql import query_sql from siuba import * -import utils -import shared_utils +import utils as _utils +from calitp_data_analysis import geography_utils, utils catalog = intake.open_catalog("./catalog.yml") @@ -41,7 +41,7 @@ def get_employment_tract_data(): ) tract_pop_employ = tract_pop_employ.to_crs( - shared_utils.geography_utils.CA_NAD83Albers) + geography_utils.CA_NAD83Albers) tract_pop_employ['area'] = tract_pop_employ.geometry.area @@ -70,18 +70,18 @@ def get_employment_tract_data(): def save_initial_data(): - ca_block_joined = utils.get_ca_block_geo() - shared_utils.utils.geoparquet_gcs_export(ca_block_joined, utils.GCS_FILE_PATH, + ca_block_joined = _utils.get_ca_block_geo() + utils.geoparquet_gcs_export(ca_block_joined, _utils.GCS_FILE_PATH, 'block_population_joined') - all_stops = utils.get_stops_and_trips(filter_accessible = False) + all_stops = _utils.get_stops_and_trips(filter_accessible = False) all_stops = all_stops.apply(buffer_by_route_type, axis=1) - shared_utils.utils.geoparquet_gcs_export(all_stops, utils.GCS_FILE_PATH, + utils.geoparquet_gcs_export(all_stops, _utils.GCS_FILE_PATH, 'all_stops') - accessible_stops_trips = utils.get_stops_and_trips(filter_accessible = True) + accessible_stops_trips = _utils.get_stops_and_trips(filter_accessible = True) accessible_stops_trips = accessible_stops_trips.apply(buffer_by_route_type, axis=1) - shared_utils.utils.geoparquet_gcs_export(accessible_stops_trips, utils.GCS_FILE_PATH, + utils.geoparquet_gcs_export(accessible_stops_trips, _utils.GCS_FILE_PATH, 'accessible_stops_trips') @@ -107,7 +107,7 @@ def save_initial_data(): _.calitp_url_number == _.url_number) ) - rt_complete.to_parquet(f'{utils.GCS_FILE_PATH}rt_complete.parquet') + rt_complete.to_parquet(f'{_utils.GCS_FILE_PATH}rt_complete.parquet') @@ -223,12 +223,12 @@ def spatial_joins_to_blocks_and_tracts(): Return 2 dictionaries of results. ''' # Read in parquets from above - ca_block_joined = shared_utils.utils.download_geoparquet( - utils.GCS_FILE_PATH, 'block_population_joined') - all_stops = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, 'all_stops') - accessible_stops_trips = shared_utils.utils.download_geoparquet( - utils.GCS_FILE_PATH, 'accessible_stops_trips') - rt_complete = pd.read_parquet(f"{utils.GCS_FILE_PATH}rt_complete.parquet") + ca_block_joined = gpd.read_parquet( + f"{_utils.GCS_FILE_PATH}block_population_joined.parquet") + all_stops = gpd.read_parquet(f"{_utils.GCS_FILE_PATH}all_stops.parquet") + accessible_stops_trips = gpd.read_parquet( + f"{_utils.GCS_FILE_PATH}accessible_stops_trips.parquet") + rt_complete = pd.read_parquet(f"{_utils.GCS_FILE_PATH}rt_complete.parquet") # Read in employment data by tract tract_pop_employ_filtered = get_employment_tract_data() @@ -265,8 +265,8 @@ def spatial_joins_to_blocks_and_tracts(): for key, value in sjoin_blocks.items(): print(key) new_name = rename_block_files[key] - shared_utils.utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{new_name}") + utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{new_name}") for key, value in sjoin_tracts.items(): print(key) - shared_utils.utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{key}") + utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{key}") diff --git a/msd_dashboard_metric/setup_charts.py b/msd_dashboard_metric/setup_charts.py index ed5ed3549..5b9d1717d 100644 --- a/msd_dashboard_metric/setup_charts.py +++ b/msd_dashboard_metric/setup_charts.py @@ -1,8 +1,8 @@ import altair as alt import pandas as pd -from shared_utils import styleguide -from shared_utils import calitp_color_palette as cp +from calitp_data_analysis import styleguide +from calitp_data_analysis import calitp_color_palette as cp AXIS_DATE_FORMAT ="%-m/%-d/%y" diff --git a/msd_dashboard_metric/utils.py b/msd_dashboard_metric/utils.py index 2895c2595..4da872ab7 100644 --- a/msd_dashboard_metric/utils.py +++ b/msd_dashboard_metric/utils.py @@ -3,10 +3,8 @@ import geopandas as gpd import datetime as dt -import shared_utils - -import calitp -from calitp.tables import tbl +from calitp_data_analysis.tables import tbls +from calitp_data_analysis import geography_utils from siuba import * import requests @@ -30,20 +28,20 @@ def get_ca_block_group_geo(): stanford_shorelines = catalog.stanford_shorelines.read() ca_shoreline = stanford_shorelines >> filter(_.STFIPS == '06') ca_block_geo = ca_block_geo.clip(ca_shoreline) - ca_block_geo = ca_block_geo.to_crs(shared_utils.geography_utils.CA_NAD83Albers) + ca_block_geo = ca_block_geo.to_crs(geography_utils.CA_NAD83Albers) return ca_block_geo # Use this one, move to TIGER file def get_ca_block_geo(): # Bring in block geometry - ca_blocks = gpd.read_parquet(f'{utils.GCS_FILE_PATH}2020_tiger_block_geo.parquet') + ca_blocks = gpd.read_parquet(f'{GCS_FILE_PATH}2020_tiger_block_geo.parquet') ca_blocks = (ca_blocks >> filter(_.ALAND20 > 10) ## remove water >> select(_.county == _.COUNTYFP20, _.tract == _.TRACTCE20, _.block == _.BLOCKCE20, _.geo_id == _.GEOID20, _.geometry)) - ca_blocks = ca_blocks.to_crs(shared_utils.geography_utils.CA_NAD83Albers) + ca_blocks = ca_blocks.to_crs(geography_utils.CA_NAD83Albers) # Bring in block population ca_block_pop = catalog.ca_block_population.read() @@ -105,7 +103,7 @@ def get_stops_and_trips(filter_accessible): geometry=gpd.points_from_xy(stops_trips.stop_lon, stops_trips.stop_lat), crs = 'EPSG:4326') - .to_crs(shared_utils.geography_utils.CA_NAD83Albers) + .to_crs(geography_utils.CA_NAD83Albers) ) return stops_trips From 5e117be39efb28da971d68b4bd6480d952668530 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 23:42:35 +0000 Subject: [PATCH 07/14] remove shared_utils references in one_hundred_recs/ --- one_hundred_recs/bb_d1.ipynb | 4 ---- one_hundred_recs/bb_d10.ipynb | 4 ---- one_hundred_recs/bb_d11.ipynb | 4 ---- one_hundred_recs/bb_d12.ipynb | 3 --- one_hundred_recs/bb_d2.ipynb | 4 ---- one_hundred_recs/bb_d3.ipynb | 4 ---- one_hundred_recs/bb_d4.ipynb | 4 ---- one_hundred_recs/bb_d5.ipynb | 4 ---- one_hundred_recs/bb_d6.ipynb | 4 ---- one_hundred_recs/bb_d7.ipynb | 4 ---- one_hundred_recs/bb_d8.ipynb | 4 ---- one_hundred_recs/bb_d9.ipynb | 4 ---- one_hundred_recs/bb_test_d8.ipynb | 4 ---- one_hundred_recs/export_hwy_data_geojson.py | 2 +- one_hundred_recs/major-route-improvements.ipynb | 4 ++-- one_hundred_recs/refined_corridors_hotspots.ipynb | 4 ---- 16 files changed, 3 insertions(+), 58 deletions(-) diff --git a/one_hundred_recs/bb_d1.ipynb b/one_hundred_recs/bb_d1.ipynb index a87049cb8..f8df4a222 100644 --- a/one_hundred_recs/bb_d1.ipynb +++ b/one_hundred_recs/bb_d1.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d10.ipynb b/one_hundred_recs/bb_d10.ipynb index 4f73cf983..f094f06a1 100644 --- a/one_hundred_recs/bb_d10.ipynb +++ b/one_hundred_recs/bb_d10.ipynb @@ -18,15 +18,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d11.ipynb b/one_hundred_recs/bb_d11.ipynb index 0fa66399b..7019d235e 100644 --- a/one_hundred_recs/bb_d11.ipynb +++ b/one_hundred_recs/bb_d11.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d12.ipynb b/one_hundred_recs/bb_d12.ipynb index 520fc773c..0286402b7 100644 --- a/one_hundred_recs/bb_d12.ipynb +++ b/one_hundred_recs/bb_d12.ipynb @@ -18,15 +18,12 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d2.ipynb b/one_hundred_recs/bb_d2.ipynb index d6e11dbeb..f04c9d3b0 100644 --- a/one_hundred_recs/bb_d2.ipynb +++ b/one_hundred_recs/bb_d2.ipynb @@ -17,15 +17,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d3.ipynb b/one_hundred_recs/bb_d3.ipynb index 31725c9a4..eef3524c7 100644 --- a/one_hundred_recs/bb_d3.ipynb +++ b/one_hundred_recs/bb_d3.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d4.ipynb b/one_hundred_recs/bb_d4.ipynb index 6294cd04a..57f65603e 100644 --- a/one_hundred_recs/bb_d4.ipynb +++ b/one_hundred_recs/bb_d4.ipynb @@ -18,15 +18,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d5.ipynb b/one_hundred_recs/bb_d5.ipynb index 63a8f4206..84a6d84da 100644 --- a/one_hundred_recs/bb_d5.ipynb +++ b/one_hundred_recs/bb_d5.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d6.ipynb b/one_hundred_recs/bb_d6.ipynb index 255c4be8c..42f8dbf44 100644 --- a/one_hundred_recs/bb_d6.ipynb +++ b/one_hundred_recs/bb_d6.ipynb @@ -18,15 +18,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d7.ipynb b/one_hundred_recs/bb_d7.ipynb index 11dc5971f..9af1ef5ae 100644 --- a/one_hundred_recs/bb_d7.ipynb +++ b/one_hundred_recs/bb_d7.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d8.ipynb b/one_hundred_recs/bb_d8.ipynb index 771204480..1eec2f01e 100644 --- a/one_hundred_recs/bb_d8.ipynb +++ b/one_hundred_recs/bb_d8.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_d9.ipynb b/one_hundred_recs/bb_d9.ipynb index 30c82c673..283b5f830 100644 --- a/one_hundred_recs/bb_d9.ipynb +++ b/one_hundred_recs/bb_d9.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/bb_test_d8.ipynb b/one_hundred_recs/bb_test_d8.ipynb index 8fdbbc1ad..7f1ac087d 100644 --- a/one_hundred_recs/bb_test_d8.ipynb +++ b/one_hundred_recs/bb_test_d8.ipynb @@ -17,15 +17,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", diff --git a/one_hundred_recs/export_hwy_data_geojson.py b/one_hundred_recs/export_hwy_data_geojson.py index 795a92af4..7f4db7b74 100644 --- a/one_hundred_recs/export_hwy_data_geojson.py +++ b/one_hundred_recs/export_hwy_data_geojson.py @@ -11,7 +11,7 @@ import pandas as pd from bus_service_utils import better_bus_utils -from shared_utils import utils +from calitp_data_analysis import utils BUS_SERVICE_GCS = "gs://calitp-analytics-data/data-analyses/bus_service_increase/" RECS_GCS = "gs://calitp-analytics-data/data-analyses/one_hundred_recs/" diff --git a/one_hundred_recs/major-route-improvements.ipynb b/one_hundred_recs/major-route-improvements.ipynb index 4dca43b63..eff41a7ed 100644 --- a/one_hundred_recs/major-route-improvements.ipynb +++ b/one_hundred_recs/major-route-improvements.ipynb @@ -35,8 +35,8 @@ "\n", "from IPython.display import Markdown, HTML\n", "\n", - "from shared_utils import calitp_color_palette as cp\n", - "from shared_utils import geography_utils, portfolio_utils\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from shared_utils import portfolio_utils\n", "from bus_service_utils import better_bus_utils\n", "\n", "catalog = intake.open_catalog(\"../bus_service_increase/*.yml\")\n", diff --git a/one_hundred_recs/refined_corridors_hotspots.ipynb b/one_hundred_recs/refined_corridors_hotspots.ipynb index 5feaccb02..d4bdb8755 100644 --- a/one_hundred_recs/refined_corridors_hotspots.ipynb +++ b/one_hundred_recs/refined_corridors_hotspots.ipynb @@ -9,15 +9,11 @@ "source": [ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", - "import shared_utils\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import folium \n", "import branca\n", - "import datetime as dt\n", "from siuba import *\n", - "from bus_service_utils import better_bus_utils\n", "\n", "from rt_analysis import rt_filter_map_plot\n", "\n", From c9b94277264e788756a76f33348c8391f2037a29 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 23:49:58 +0000 Subject: [PATCH 08/14] shared_utils to calitp_data_analysis for open_data/ --- open_data/concatenate_vehicle_positions.py | 3 ++- open_data/create_routes_data.py | 3 ++- open_data/create_stops_data.py | 3 ++- open_data/debug-amtrak.ipynb | 3 +-- open_data/download_shapes.py | 3 ++- open_data/download_stops.py | 3 ++- open_data/download_vehicle_positions.py | 3 ++- open_data/gcs_to_esri.py | 2 +- open_data/link_operator_to_county_district.py | 3 +-- open_data/prep_traffic_ops.py | 3 ++- open_data/supplement_meta.py | 2 +- 11 files changed, 18 insertions(+), 13 deletions(-) diff --git a/open_data/concatenate_vehicle_positions.py b/open_data/concatenate_vehicle_positions.py index cbd49df69..32db514ef 100644 --- a/open_data/concatenate_vehicle_positions.py +++ b/open_data/concatenate_vehicle_positions.py @@ -13,7 +13,8 @@ from dask import delayed from loguru import logger -from shared_utils import dask_utils, schedule_rt_utils, utils +from shared_utils import dask_utils, schedule_rt_utils +from calitp_data_analysis import utils from update_vars import SEGMENT_GCS, analysis_date fs = gcsfs.GCSFileSystem() diff --git a/open_data/create_routes_data.py b/open_data/create_routes_data.py index b3746eee4..460a8222d 100644 --- a/open_data/create_routes_data.py +++ b/open_data/create_routes_data.py @@ -11,7 +11,8 @@ from datetime import datetime import prep_traffic_ops -from shared_utils import utils, geography_utils, portfolio_utils +from calitp_data_analysis import utils, geography_utils +from shared_utils import portfolio_utils from segment_speed_utils import helpers from update_vars import analysis_date, TRAFFIC_OPS_GCS diff --git a/open_data/create_stops_data.py b/open_data/create_stops_data.py index 862a6ed6f..3c7776c77 100644 --- a/open_data/create_stops_data.py +++ b/open_data/create_stops_data.py @@ -12,7 +12,8 @@ from datetime import datetime import prep_traffic_ops -from shared_utils import utils, geography_utils, schedule_rt_utils +from calitp_data_analysis import utils, geography_utils +from shared_utils import schedule_rt_utils from segment_speed_utils import helpers from update_vars import analysis_date, TRAFFIC_OPS_GCS diff --git a/open_data/debug-amtrak.ipynb b/open_data/debug-amtrak.ipynb index 4a1391488..5dbb09d02 100644 --- a/open_data/debug-amtrak.ipynb +++ b/open_data/debug-amtrak.ipynb @@ -29,6 +29,7 @@ "import pandas as pd\n", "from segment_speed_utils import helpers\n", "from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS, analysis_date\n", + "from calitp_data_analysis import utils\n", "\n", "TRAFFIC_OPS_GCS = \"gs://calitp-analytics-data/data-analyses/traffic_ops/\"\n", "\n", @@ -396,8 +397,6 @@ "metadata": {}, "outputs": [], "source": [ - "from shared_utils import utils\n", - "\n", "utils.geoparquet_gcs_export(\n", " routes3, \n", " TRAFFIC_OPS_GCS, \n", diff --git a/open_data/download_shapes.py b/open_data/download_shapes.py index 3bc70d73f..8603a4a28 100644 --- a/open_data/download_shapes.py +++ b/open_data/download_shapes.py @@ -11,7 +11,8 @@ from loguru import logger from download_trips import get_operators -from shared_utils import gtfs_utils_v2, geography_utils, utils +from shared_utils import gtfs_utils_v2 +from calitp_data_analysis import geography_utils, utils from update_vars import analysis_date, COMPILED_CACHED_VIEWS if __name__ == "__main__": diff --git a/open_data/download_stops.py b/open_data/download_stops.py index 399a91309..90735535d 100644 --- a/open_data/download_stops.py +++ b/open_data/download_stops.py @@ -11,7 +11,8 @@ from loguru import logger from download_trips import get_operators -from shared_utils import gtfs_utils_v2, geography_utils, utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import gtfs_utils_v2 from update_vars import analysis_date, COMPILED_CACHED_VIEWS if __name__ == "__main__": diff --git a/open_data/download_vehicle_positions.py b/open_data/download_vehicle_positions.py index 53099ffe3..78bcf4561 100644 --- a/open_data/download_vehicle_positions.py +++ b/open_data/download_vehicle_positions.py @@ -12,10 +12,11 @@ import sys from calitp_data_analysis.tables import tbls +from calitp_data_analysis import utils from loguru import logger from siuba import * -from shared_utils import utils, schedule_rt_utils +from shared_utils import schedule_rt_utils from update_vars import SEGMENT_GCS, analysis_date fs = gcsfs.GCSFileSystem() diff --git a/open_data/gcs_to_esri.py b/open_data/gcs_to_esri.py index a0c6b155b..aab511539 100644 --- a/open_data/gcs_to_esri.py +++ b/open_data/gcs_to_esri.py @@ -12,7 +12,7 @@ from loguru import logger -from shared_utils import utils, geography_utils +from calitp_data_analysis import utils, geography_utils from update_vars import analysis_date catalog = intake.open_catalog("./catalog.yml") diff --git a/open_data/link_operator_to_county_district.py b/open_data/link_operator_to_county_district.py index e55e629c8..f6acc25c9 100644 --- a/open_data/link_operator_to_county_district.py +++ b/open_data/link_operator_to_county_district.py @@ -3,8 +3,7 @@ import pandas as pd from calitp_data_analysis.sql import to_snakecase - -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils from update_vars import analysis_date, COMPILED_CACHED_VIEWS catalog = intake.open_catalog("../_shared_utils/shared_utils/*.yml") diff --git a/open_data/prep_traffic_ops.py b/open_data/prep_traffic_ops.py index 56d515156..d601e40bb 100644 --- a/open_data/prep_traffic_ops.py +++ b/open_data/prep_traffic_ops.py @@ -6,7 +6,8 @@ import intake import pandas as pd -from shared_utils import utils, geography_utils, schedule_rt_utils +from calitp_data_analysis import utils, geography_utils +from shared_utils import schedule_rt_utils from update_vars import TRAFFIC_OPS_GCS, analysis_date catalog = intake.open_catalog( diff --git a/open_data/supplement_meta.py b/open_data/supplement_meta.py index dc58c49de..f3c41c9a6 100644 --- a/open_data/supplement_meta.py +++ b/open_data/supplement_meta.py @@ -7,7 +7,7 @@ from pathlib import Path -from shared_utils import utils +from calitp_data_analysis import utils from update_vars import analysis_date, ESRI_BASE_URL From 888c4738a84a83e2c43750826cd9228d0d557dd4 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 28 Sep 2023 23:52:56 +0000 Subject: [PATCH 09/14] shared_utils to calitp_data_analysis for py_crow_flies/ --- py_crow_flies/decay.py | 2 +- py_crow_flies/five_days_of_the_crow.ipynb | 5 ++--- py_crow_flies/prep.py | 2 +- py_crow_flies/py_crow_flies.ipynb | 11 +++++------ 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/py_crow_flies/decay.py b/py_crow_flies/decay.py index 55b2003c1..37870ea0d 100644 --- a/py_crow_flies/decay.py +++ b/py_crow_flies/decay.py @@ -15,7 +15,7 @@ from dask import delayed, compute from loguru import logger -from shared_utils import utils +from calitp_data_analysis import utils from prep import GCS_FILE_PATH fs = gcsfs.GCSFileSystem() diff --git a/py_crow_flies/five_days_of_the_crow.ipynb b/py_crow_flies/five_days_of_the_crow.ipynb index 36a23119e..dfa48e243 100644 --- a/py_crow_flies/five_days_of_the_crow.ipynb +++ b/py_crow_flies/five_days_of_the_crow.ipynb @@ -31,8 +31,7 @@ "from siuba import *\n", "\n", "from tqdm.notebook import tqdm\n", - "\n", - "import shared_utils" + "from calitp_data_analysis import geography_utils" ] }, { @@ -268,7 +267,7 @@ "outputs": [], "source": [ "# Transform the grid points to your preferred CRS\n", - "central = central.to_crs(shared_utils.geography_utils.CA_NAD83Albers).set_index('pointid')\n", + "central = central.to_crs(geography_utils.CA_NAD83Albers).set_index('pointid')\n", "central = central >> select(-_.Point_ID)" ] }, diff --git a/py_crow_flies/prep.py b/py_crow_flies/prep.py index 613e8808e..d30e2fe9f 100644 --- a/py_crow_flies/prep.py +++ b/py_crow_flies/prep.py @@ -12,7 +12,7 @@ import sys from loguru import logger -from shared_utils import utils +from calitp_data_analysis import utils GCS_FILE_PATH = "gs://calitp-publish-data-analysis/py_crow_flies/" CRS = "EPSG:3857" diff --git a/py_crow_flies/py_crow_flies.ipynb b/py_crow_flies/py_crow_flies.ipynb index 7cd82d0d5..51416fca3 100644 --- a/py_crow_flies/py_crow_flies.ipynb +++ b/py_crow_flies/py_crow_flies.ipynb @@ -21,8 +21,7 @@ "from siuba import *\n", "\n", "from tqdm.notebook import tqdm\n", - "\n", - "import shared_utils" + "from calitp_data_analysis import utils" ] }, { @@ -78,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "shared_utils.utils.geoparquet_gcs_export(central, crow_folder, 'CentralCal_POIs')" + "utils.geoparquet_gcs_export(central, crow_folder, 'CentralCal_POIs')" ] }, { @@ -98,7 +97,7 @@ "metadata": {}, "outputs": [], "source": [ - "shared_utils.utils.geoparquet_gcs_export(nor, crow_folder, 'NorCal_POIs')" + "utils.geoparquet_gcs_export(nor, crow_folder, 'NorCal_POIs')" ] }, { @@ -118,7 +117,7 @@ "metadata": {}, "outputs": [], "source": [ - "shared_utils.utils.geoparquet_gcs_export(so, crow_folder, 'SoCal_POIs')" + "utils.geoparquet_gcs_export(so, crow_folder, 'SoCal_POIs')" ] }, { @@ -138,7 +137,7 @@ "metadata": {}, "outputs": [], "source": [ - "shared_utils.utils.geoparquet_gcs_export(mo, crow_folder, 'Mojave_POIs')" + "utils.geoparquet_gcs_export(mo, crow_folder, 'Mojave_POIs')" ] }, { From 3da3ae337178e53a450ef51e3182743fca7faf46 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 29 Sep 2023 00:00:54 +0000 Subject: [PATCH 10/14] shared_utils to calitp_data_analysis for quarterly_performance_objective/ --- .../A1_scheduled_route_level_df.py | 5 +++-- .../A2_generate_routes_on_shn_data.py | 2 +- quarterly_performance_objective/A3_categorize_routes.py | 2 +- .../B2_route_service_hours_delay.py | 5 +++-- quarterly_performance_objective/C1_report_metrics.py | 4 ++-- quarterly_performance_objective/C2_report_charts.py | 4 ++-- .../check-route-categories.ipynb | 2 +- .../current_quarter_report.ipynb | 8 ++++---- .../download_trips_v2_backfill.py | 3 ++- quarterly_performance_objective/historical_report.ipynb | 8 ++++---- 10 files changed, 23 insertions(+), 20 deletions(-) diff --git a/quarterly_performance_objective/A1_scheduled_route_level_df.py b/quarterly_performance_objective/A1_scheduled_route_level_df.py index c7ac64133..cdf3fb8e8 100644 --- a/quarterly_performance_objective/A1_scheduled_route_level_df.py +++ b/quarterly_performance_objective/A1_scheduled_route_level_df.py @@ -13,7 +13,8 @@ from loguru import logger from typing import Literal -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import portfolio_utils from bus_service_utils import create_parallel_corridors from segment_speed_utils import helpers, gtfs_schedule_wrangling from update_vars import (BUS_SERVICE_GCS, COMPILED_CACHED_GCS, @@ -72,7 +73,7 @@ def aggregate_trip_service_to_route_level( get_pandas = True ) - route_service_hours = geography_utils.aggregate_by_geography( + route_service_hours = portfolio_utils.aggregate_by_geography( trips, group_cols = route_cols, sum_cols = ["service_hours"] diff --git a/quarterly_performance_objective/A2_generate_routes_on_shn_data.py b/quarterly_performance_objective/A2_generate_routes_on_shn_data.py index 90a1df648..a5b8d0d80 100644 --- a/quarterly_performance_objective/A2_generate_routes_on_shn_data.py +++ b/quarterly_performance_objective/A2_generate_routes_on_shn_data.py @@ -9,7 +9,7 @@ from loguru import logger from bus_service_utils import create_parallel_corridors -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils from update_vars import (BUS_SERVICE_GCS, ANALYSIS_DATE, VERSION) diff --git a/quarterly_performance_objective/A3_categorize_routes.py b/quarterly_performance_objective/A3_categorize_routes.py index c29665512..deb883e3d 100644 --- a/quarterly_performance_objective/A3_categorize_routes.py +++ b/quarterly_performance_objective/A3_categorize_routes.py @@ -9,7 +9,7 @@ from loguru import logger -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils from update_vars import (BUS_SERVICE_GCS, ANALYSIS_DATE, VERSION) diff --git a/quarterly_performance_objective/B2_route_service_hours_delay.py b/quarterly_performance_objective/B2_route_service_hours_delay.py index d043ad271..99a60896b 100644 --- a/quarterly_performance_objective/B2_route_service_hours_delay.py +++ b/quarterly_performance_objective/B2_route_service_hours_delay.py @@ -12,7 +12,8 @@ from update_vars import (BUS_SERVICE_GCS, COMPILED_CACHED_GCS, ANALYSIS_DATE, VERSION ) -from shared_utils import geography_utils, utils +from shared_utils import portfolio_utils +from calitp_data_analysis import utils def calculate_route_level_delays(selected_date: str, route_cols: list) -> pd.DataFrame: @@ -28,7 +29,7 @@ def calculate_route_level_delays(selected_date: str, delay_df2 = delay_df.drop_duplicates( subset=route_cols + ["trip_id"]) - route_delay = geography_utils.aggregate_by_geography( + route_delay = portfolio_utils.aggregate_by_geography( delay_df2, group_cols = route_cols, sum_cols = ["delay_seconds"] diff --git a/quarterly_performance_objective/C1_report_metrics.py b/quarterly_performance_objective/C1_report_metrics.py index 676e7523e..ca02407ba 100644 --- a/quarterly_performance_objective/C1_report_metrics.py +++ b/quarterly_performance_objective/C1_report_metrics.py @@ -11,7 +11,7 @@ from typing import Literal -from shared_utils import geography_utils +from shared_utils import portfolio_utils catalog = intake.open_catalog("*.yml") @@ -22,7 +22,7 @@ def aggregate_calculate_percent_and_average( """ Create columns with pct values. """ - agg_df = geography_utils.aggregate_by_geography( + agg_df = portfolio_utils.aggregate_by_geography( df, group_cols = group_cols, sum_cols = sum_cols, diff --git a/quarterly_performance_objective/C2_report_charts.py b/quarterly_performance_objective/C2_report_charts.py index 09f7eaa8d..0e35dfaf6 100644 --- a/quarterly_performance_objective/C2_report_charts.py +++ b/quarterly_performance_objective/C2_report_charts.py @@ -3,8 +3,8 @@ from typing import List, Literal -from shared_utils import styleguide -from shared_utils import calitp_color_palette as cp +from calitp_data_analysis import styleguide +from calitp_data_analysis import calitp_color_palette as cp def base_bar(df: pd.DataFrame, x_col: str) -> alt.Chart: chart = (alt.Chart(df) diff --git a/quarterly_performance_objective/check-route-categories.ipynb b/quarterly_performance_objective/check-route-categories.ipynb index 599ed27bc..cd8d1f84a 100644 --- a/quarterly_performance_objective/check-route-categories.ipynb +++ b/quarterly_performance_objective/check-route-categories.ipynb @@ -337,7 +337,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/quarterly_performance_objective/current_quarter_report.ipynb b/quarterly_performance_objective/current_quarter_report.ipynb index 348448ec0..b6aa9e53f 100644 --- a/quarterly_performance_objective/current_quarter_report.ipynb +++ b/quarterly_performance_objective/current_quarter_report.ipynb @@ -46,9 +46,9 @@ "import C1_report_metrics as report_metrics\n", "import C2_report_charts as report_charts\n", "from update_vars import BUS_SERVICE_GCS, CURRENT_QUARTER, ANALYSIS_DATE\n", - "from shared_utils import geography_utils, portfolio_utils, rt_dates\n", - "from shared_utils import calitp_color_palette as cp\n", - "from shared_utils import styleguide\n", + "from shared_utils import portfolio_utils, rt_dates\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from calitp_data_analysis import styleguide\n", "\n", "hq_catalog = intake.open_catalog(\"../high_quality_transit_areas/*.yml\")" ] @@ -114,7 +114,7 @@ "metadata": {}, "outputs": [], "source": [ - "all_hours = geography_utils.aggregate_by_geography(\n", + "all_hours = portfolio_utils.aggregate_by_geography(\n", " summary.assign(category=\"All\"),\n", " group_cols = [\"category\"],\n", " sum_cols = [\"unique_route\", \"service_hours\"]\n", diff --git a/quarterly_performance_objective/download_trips_v2_backfill.py b/quarterly_performance_objective/download_trips_v2_backfill.py index 85b62d3aa..098dea1cd 100644 --- a/quarterly_performance_objective/download_trips_v2_backfill.py +++ b/quarterly_performance_objective/download_trips_v2_backfill.py @@ -15,7 +15,8 @@ from loguru import logger from shared_utils import (gtfs_utils_v2, gtfs_utils, - rt_dates, utils, geography_utils) + rt_dates) +from calitp_data_analysis import utils, geography_utils from update_vars import COMPILED_CACHED_GCS def scheduled_operators(analysis_date: str): diff --git a/quarterly_performance_objective/historical_report.ipynb b/quarterly_performance_objective/historical_report.ipynb index 5c01bc45c..fdb097d39 100644 --- a/quarterly_performance_objective/historical_report.ipynb +++ b/quarterly_performance_objective/historical_report.ipynb @@ -38,9 +38,9 @@ "import pandas as pd\n", "\n", "import C1_report_metrics as report_metrics\n", - "from shared_utils import rt_dates, geography_utils\n", - "from shared_utils import calitp_color_palette as cp\n", - "from shared_utils import styleguide\n", + "from shared_utils import rt_dates, portfolio_utils\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from calitp_data_analysis import styleguide\n", "from bus_service_utils import chart_utils" ] }, @@ -72,7 +72,7 @@ "\n", " group_cols = [\"year_quarter\", \"service_date\", \"year\", \"quarter\"]\n", "\n", - " all_routes = geography_utils.aggregate_by_geography(\n", + " all_routes = portfolio_utils.aggregate_by_geography(\n", " df[df.variable.isin(var_list)],\n", " group_cols + [\"variable\"],\n", " sum_cols = [\"value\"]\n", From 22bd512c7dd428a19b314325539b1250b35f952d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 29 Sep 2023 00:20:34 +0000 Subject: [PATCH 11/14] shared_utils to calitp_data_analysis for rt_segment_speeds/ --- rt_segment_speeds/00_diagnose_segments.ipynb | 14 +++++++------- .../04_threshold_exploratory_v1.ipynb | 6 +++--- rt_segment_speeds/10_long_trips.ipynb | 2 +- rt_segment_speeds/11_tiger.ipynb | 3 ++- rt_segment_speeds/13_organization_name.ipynb | 1 - rt_segment_speeds/14_eric_explore_compare.ipynb | 14 +++++++------- rt_segment_speeds/19_meters_threshold.ipynb | 1 - rt_segment_speeds/_rt_scheduled_utils.py | 4 +--- rt_segment_speeds/_threshold_utils.py | 3 +-- rt_segment_speeds/scripts/A1_sjoin_vp_segments.py | 2 +- .../scripts/A2_sjoin_postprocessing.py | 2 +- .../scripts/B1_speeds_by_segment_trip.py | 2 +- .../scripts/B2_avg_speeds_by_segment.py | 2 +- rt_segment_speeds/scripts/B3_export.py | 2 +- rt_segment_speeds/scripts/C3_trip_route_speed.py | 5 +++-- .../scripts/concatenate_stop_segments.py | 5 +++-- .../scripts/cut_normal_stop_segments.py | 2 +- rt_segment_speeds/scripts/cut_road_segments.py | 4 ++-- rt_segment_speeds/scripts/cut_route_segments.py | 2 +- .../scripts/cut_special_stop_segments.py | 2 +- rt_segment_speeds/scripts/download_all_roads.py | 4 ++-- rt_segment_speeds/scripts/ingest_ca_roads.py | 2 +- rt_segment_speeds/scripts/prep_stop_segments.py | 2 +- rt_segment_speeds/segment_speed_utils/helpers.py | 6 ++++-- 24 files changed, 46 insertions(+), 46 deletions(-) diff --git a/rt_segment_speeds/00_diagnose_segments.ipynb b/rt_segment_speeds/00_diagnose_segments.ipynb index 1b62639f6..ef80e0ee9 100644 --- a/rt_segment_speeds/00_diagnose_segments.ipynb +++ b/rt_segment_speeds/00_diagnose_segments.ipynb @@ -23,7 +23,7 @@ "import geopandas as gpd\n", "from siuba import *\n", "\n", - "import shared_utils\n", + "from shared_utils import rt_dates\n", "\n", "import gcsfs\n", "fs = gcsfs.GCSFileSystem()" @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "analysis_date = shared_utils.rt_dates.DATES['jan2023']" + "analysis_date = rt_dates.DATES['jan2023']" ] }, { @@ -792,7 +792,7 @@ "import geopandas as gpd\n", "from siuba import *\n", "\n", - "import shared_utils\n", + "from shared_utils import gtfs_utils_v2\n", "\n", "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(1_000_000_000_000) ## 1TB?" @@ -818,7 +818,7 @@ } ], "source": [ - "shared_utils.gtfs_utils_v2?" + "gtfs_utils_v2?" ] }, { @@ -828,7 +828,7 @@ "metadata": {}, "outputs": [], "source": [ - "from calitp.tables import tbls" + "from calitp_data_analysis.tables import tbls" ] }, { @@ -1812,7 +1812,7 @@ } ], "source": [ - "kings = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_organization(selected_date = '2023-02-07') >> filter(_.name.str.contains(\"Kings\"))\n", + "kings = gtfs_utils_v2.schedule_daily_feed_to_organization(selected_date = '2023-02-07') >> filter(_.name.str.contains(\"Kings\"))\n", "kings" ] }, @@ -1872,7 +1872,7 @@ } ], "source": [ - "shared_utils.gtfs_utils_v2.get_trips(selected_date= '2023-02-07', operator_feeds=kings.feed_key.to_list()) " + "gtfs_utils_v2.get_trips(selected_date= '2023-02-07', operator_feeds=kings.feed_key.to_list()) " ] } ], diff --git a/rt_segment_speeds/04_threshold_exploratory_v1.ipynb b/rt_segment_speeds/04_threshold_exploratory_v1.ipynb index 1e1194ac9..3b73b08e9 100644 --- a/rt_segment_speeds/04_threshold_exploratory_v1.ipynb +++ b/rt_segment_speeds/04_threshold_exploratory_v1.ipynb @@ -32,7 +32,7 @@ "import geopandas as gpd\n", "import numpy as np\n", "import pandas as pd\n", - "from calitp.sql import to_snakecase" + "from calitp_data_analysis.sql import to_snakecase" ] }, { @@ -44,8 +44,8 @@ }, "outputs": [], "source": [ - "from shared_utils import calitp_color_palette as cp\n", - "from shared_utils import geography_utils, styleguide, utils" + "from calitp_data_analysis import calitp_color_palette as cp\n", + "from calitp_data_analysis import geography_utils, styleguide, utils" ] }, { diff --git a/rt_segment_speeds/10_long_trips.ipynb b/rt_segment_speeds/10_long_trips.ipynb index 50b735bd9..3b84bc78f 100644 --- a/rt_segment_speeds/10_long_trips.ipynb +++ b/rt_segment_speeds/10_long_trips.ipynb @@ -29,7 +29,7 @@ "import altair as alt\n", "\n", "import pandas as pd\n", - "from shared_utils import calitp_color_palette as cp" + "from calitp_data_analysis import calitp_color_palette as cp" ] }, { diff --git a/rt_segment_speeds/11_tiger.ipynb b/rt_segment_speeds/11_tiger.ipynb index 58d91bca3..dbc67ab87 100644 --- a/rt_segment_speeds/11_tiger.ipynb +++ b/rt_segment_speeds/11_tiger.ipynb @@ -48,7 +48,8 @@ "from dask import compute, delayed\n", "from segment_speed_utils import helpers\n", "from segment_speed_utils.project_vars import analysis_date\n", - "from shared_utils import dask_utils, geography_utils, utils\n", + "from shared_utils import dask_utils\n", + "from calitp_data_analysis import geography_utils, utils\n", "\n", "GCS_FILE_PATH = \"gs://calitp-analytics-data/data-analyses/\"\n", "SHARED_GCS = f\"{GCS_FILE_PATH}shared_data/\"" diff --git a/rt_segment_speeds/13_organization_name.ipynb b/rt_segment_speeds/13_organization_name.ipynb index 6eab8f58e..c067201c6 100644 --- a/rt_segment_speeds/13_organization_name.ipynb +++ b/rt_segment_speeds/13_organization_name.ipynb @@ -34,7 +34,6 @@ "from calitp_data_analysis.tables import tbls\n", "from siuba import *\n", "\n", - "from shared_utils import gtfs_utils_v2, portfolio_utils, utils\n", "from segment_speed_utils import helpers\n", "from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date" ] diff --git a/rt_segment_speeds/14_eric_explore_compare.ipynb b/rt_segment_speeds/14_eric_explore_compare.ipynb index 1492c0756..a82cb5c62 100644 --- a/rt_segment_speeds/14_eric_explore_compare.ipynb +++ b/rt_segment_speeds/14_eric_explore_compare.ipynb @@ -10,11 +10,11 @@ "import os\n", "os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(800_000_000_000)\n", "os.environ['USE_PYGEOS'] = '0'\n", - "import shared_utils\n", "\n", "from calitp_data_analysis.tables import tbls\n", "import calitp_data_analysis.magics\n", - "from calitp_data.storage import get_fs\n", + "from calitp_data_infra.storage import get_fs\n", + "from shared_utils import rt_utils\n", "\n", "from siuba import *\n", "import pandas as pd\n", @@ -364,7 +364,7 @@ } ], "source": [ - "shared_utils.rt_utils.get_speedmaps_ix_df?" + "rt_utils.get_speedmaps_ix_df?" ] }, { @@ -494,7 +494,7 @@ } ], "source": [ - "shared_utils.rt_utils.get_speedmaps_ix_df(analysis_date) >> filter(_.organization_name.str.contains('Santa')) >> head(5)" + "rt_utils.get_speedmaps_ix_df(analysis_date) >> filter(_.organization_name.str.contains('Santa')) >> head(5)" ] }, { @@ -662,7 +662,7 @@ "metadata": {}, "outputs": [], "source": [ - "from shared_utils.rt_utils import try_parallel, arrowize_by_frequency, ZERO_THIRTY_COLORSCALE, SPEEDMAP_LEGEND_URL" + "from rt_utils import try_parallel, arrowize_by_frequency, ZERO_THIRTY_COLORSCALE, SPEEDMAP_LEGEND_URL" ] }, { @@ -881,7 +881,7 @@ } ], "source": [ - "shared_utils.rt_utils.set_state_export(segments_samo)" + "rt_utils.set_state_export(segments_samo)" ] }, { @@ -929,7 +929,7 @@ } ], "source": [ - "shared_utils.rt_utils.set_state_export(segments_samo, cmap=ZERO_THIRTY_COLORSCALE, legend_url=SPEEDMAP_LEGEND_URL,\n", + "rt_utils.set_state_export(segments_samo, cmap=ZERO_THIRTY_COLORSCALE, legend_url=SPEEDMAP_LEGEND_URL,\n", " map_type='speedmap', color_col='p20_mph', filename='samo_segments_test')" ] }, diff --git a/rt_segment_speeds/19_meters_threshold.ipynb b/rt_segment_speeds/19_meters_threshold.ipynb index edc554b3f..cd9e3a905 100644 --- a/rt_segment_speeds/19_meters_threshold.ipynb +++ b/rt_segment_speeds/19_meters_threshold.ipynb @@ -19,7 +19,6 @@ " analysis_date,\n", ")\n", "from scripts import (A1_sjoin_vp_segments, A2_valid_vehicle_positions,B2_avg_speeds_by_segment)\n", - "from shared_utils import geography_utils\n", "import _threshold_utils as threshold_utils\n", "import _rt_scheduled_utils as rt_scheduled_utils\n", "CONFIG_PATH = './scripts/config.yml'\n", diff --git a/rt_segment_speeds/_rt_scheduled_utils.py b/rt_segment_speeds/_rt_scheduled_utils.py index 86fe92030..c190e3e63 100644 --- a/rt_segment_speeds/_rt_scheduled_utils.py +++ b/rt_segment_speeds/_rt_scheduled_utils.py @@ -1,7 +1,5 @@ -import numpy as np import pandas as pd import dask.dataframe as dd -import datetime import _threshold_utils as threshold_utils from segment_speed_utils import helpers, sched_rt_utils @@ -9,7 +7,7 @@ # Graphs import altair as alt -from shared_utils import calitp_color_palette as cp +from calitp_data_analysis import calitp_color_palette as cp alt.data_transformers.enable('default', max_rows=None) import gcsfs diff --git a/rt_segment_speeds/_threshold_utils.py b/rt_segment_speeds/_threshold_utils.py index 1d926d3a4..3e9de30cc 100644 --- a/rt_segment_speeds/_threshold_utils.py +++ b/rt_segment_speeds/_threshold_utils.py @@ -4,8 +4,7 @@ from calitp_data_analysis.sql import to_snakecase import altair as alt -from shared_utils import calitp_color_palette as cp -from shared_utils import geography_utils, rt_utils, styleguide, utils +from calitp_data_analysis import calitp_color_palette as cp import intake catalog = intake.open_catalog("./catalog_threshold.yml") diff --git a/rt_segment_speeds/scripts/A1_sjoin_vp_segments.py b/rt_segment_speeds/scripts/A1_sjoin_vp_segments.py index 61f178304..135ca3aa9 100644 --- a/rt_segment_speeds/scripts/A1_sjoin_vp_segments.py +++ b/rt_segment_speeds/scripts/A1_sjoin_vp_segments.py @@ -15,7 +15,7 @@ from loguru import logger -from shared_utils.geography_utils import WGS84 +from calitp_data_analysis.geography_utils import WGS84 from segment_speed_utils import helpers from segment_speed_utils.project_vars import (analysis_date, SEGMENT_GCS, CONFIG_PATH, PROJECT_CRS) diff --git a/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py b/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py index 886dc7208..4bef3b131 100644 --- a/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py +++ b/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py @@ -19,7 +19,7 @@ from loguru import logger -from shared_utils.geography_utils import WGS84 +from calitp_data_analysis.geography_utils import WGS84 from segment_speed_utils import helpers, segment_calcs, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS, CONFIG_PATH) diff --git a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py index 4078478f9..75b8d7b14 100644 --- a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py +++ b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py @@ -15,7 +15,7 @@ from loguru import logger -from shared_utils import geography_utils +from calitp_data_analysis import geography_utils from segment_speed_utils import helpers, segment_calcs, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS, CONFIG_PATH) diff --git a/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py b/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py index 657c9bdd8..dc499e5d1 100644 --- a/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py +++ b/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py @@ -11,7 +11,7 @@ from segment_speed_utils import helpers, sched_rt_utils from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, CONFIG_PATH) -from shared_utils import utils, geography_utils +from calitp_data_analysis import utils, geography_utils def calculate_avg_speeds( diff --git a/rt_segment_speeds/scripts/B3_export.py b/rt_segment_speeds/scripts/B3_export.py index 17147560c..53333fc6b 100644 --- a/rt_segment_speeds/scripts/B3_export.py +++ b/rt_segment_speeds/scripts/B3_export.py @@ -12,7 +12,7 @@ import pandas as pd from shared_utils import schedule_rt_utils, utils -from shared_utils.geography_utils import WGS84 +from calitp_data_analysis.geography_utils import WGS84 from segment_speed_utils import helpers from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, CONFIG_PATH) diff --git a/rt_segment_speeds/scripts/C3_trip_route_speed.py b/rt_segment_speeds/scripts/C3_trip_route_speed.py index 57e0355c3..46795c36d 100644 --- a/rt_segment_speeds/scripts/C3_trip_route_speed.py +++ b/rt_segment_speeds/scripts/C3_trip_route_speed.py @@ -13,8 +13,9 @@ import pandas as pd from shared_utils.rt_utils import MPH_PER_MPS -from shared_utils.geography_utils import WGS84 -from shared_utils import utils, portfolio_utils, schedule_rt_utils +from calitp_data_analysis.geography_utils import WGS84 +from calitp_data_analysis import utils +from shared_utils import portfolio_utils, schedule_rt_utils from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS) diff --git a/rt_segment_speeds/scripts/concatenate_stop_segments.py b/rt_segment_speeds/scripts/concatenate_stop_segments.py index c45c3092e..3cd2a883f 100644 --- a/rt_segment_speeds/scripts/concatenate_stop_segments.py +++ b/rt_segment_speeds/scripts/concatenate_stop_segments.py @@ -8,8 +8,9 @@ import geopandas as gpd import pandas as pd -from shared_utils import utils, portfolio_utils -from shared_utils.geography_utils import WGS84 +from calitp_data_analysis import utils +from calitp_data_analysis.geography_utils import WGS84 +from shared_utils import portfolio_utils from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, CONFIG_PATH) diff --git a/rt_segment_speeds/scripts/cut_normal_stop_segments.py b/rt_segment_speeds/scripts/cut_normal_stop_segments.py index 64687444f..40c4e4544 100644 --- a/rt_segment_speeds/scripts/cut_normal_stop_segments.py +++ b/rt_segment_speeds/scripts/cut_normal_stop_segments.py @@ -15,7 +15,7 @@ from dask import delayed, compute from loguru import logger -from shared_utils import utils +from calitp_data_analysis import utils from segment_speed_utils import array_utils, helpers, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS, CONFIG_PATH) diff --git a/rt_segment_speeds/scripts/cut_road_segments.py b/rt_segment_speeds/scripts/cut_road_segments.py index ce236acc3..eeb2d7b18 100644 --- a/rt_segment_speeds/scripts/cut_road_segments.py +++ b/rt_segment_speeds/scripts/cut_road_segments.py @@ -6,14 +6,14 @@ fs = gcsfs.GCSFileSystem() import dask.dataframe as dd import dask_geopandas as dg -import geopandas import geopandas as gpd import pandas as pd from calitp_data_analysis.sql import to_snakecase from dask import compute, delayed from segment_speed_utils import helpers from segment_speed_utils.project_vars import analysis_date -from shared_utils import dask_utils, geography_utils, utils +from calitp_data_analysis import geography_utils, utils +from shared_utils import dask_utils GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/" SHARED_GCS = f"{GCS_FILE_PATH}shared_data/" diff --git a/rt_segment_speeds/scripts/cut_route_segments.py b/rt_segment_speeds/scripts/cut_route_segments.py index 7b76158d7..506a450c6 100644 --- a/rt_segment_speeds/scripts/cut_route_segments.py +++ b/rt_segment_speeds/scripts/cut_route_segments.py @@ -23,7 +23,7 @@ from loguru import logger -from shared_utils import geography_utils, utils +from calitp_data_analysis import geography_utils, utils from segment_speed_utils import (gtfs_schedule_wrangling, helpers, sched_rt_utils, wrangle_shapes) from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, diff --git a/rt_segment_speeds/scripts/cut_special_stop_segments.py b/rt_segment_speeds/scripts/cut_special_stop_segments.py index e8aff6e11..0dcfb3b97 100644 --- a/rt_segment_speeds/scripts/cut_special_stop_segments.py +++ b/rt_segment_speeds/scripts/cut_special_stop_segments.py @@ -11,7 +11,7 @@ from loguru import logger import cut_normal_stop_segments -from shared_utils import utils +from calitp_data_analysis import utils from segment_speed_utils import (array_utils, helpers, wrangle_shapes) from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, diff --git a/rt_segment_speeds/scripts/download_all_roads.py b/rt_segment_speeds/scripts/download_all_roads.py index e7e465938..e066bea76 100644 --- a/rt_segment_speeds/scripts/download_all_roads.py +++ b/rt_segment_speeds/scripts/download_all_roads.py @@ -2,8 +2,8 @@ #pip install esridump import geopandas as gpd -from calitp_data_analysis import get_fs -from shared_utils import utils +from calitp_data_infra.storage import get_fs +from calitp_data_analysis import utils fs = get_fs() GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/shared_data/" diff --git a/rt_segment_speeds/scripts/ingest_ca_roads.py b/rt_segment_speeds/scripts/ingest_ca_roads.py index dd8517008..7e4d04c11 100644 --- a/rt_segment_speeds/scripts/ingest_ca_roads.py +++ b/rt_segment_speeds/scripts/ingest_ca_roads.py @@ -20,7 +20,7 @@ import fsspec import geopandas as gpd -from shared_utils import utils +from calitp_data_analysis import utils GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/shared_data/" diff --git a/rt_segment_speeds/scripts/prep_stop_segments.py b/rt_segment_speeds/scripts/prep_stop_segments.py index e9c83ffc1..47a57fa27 100644 --- a/rt_segment_speeds/scripts/prep_stop_segments.py +++ b/rt_segment_speeds/scripts/prep_stop_segments.py @@ -31,7 +31,7 @@ from loguru import logger from typing import Union -from shared_utils import utils +from calitp_data_analysis import utils from segment_speed_utils import (helpers, gtfs_schedule_wrangling, wrangle_shapes) from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, diff --git a/rt_segment_speeds/segment_speed_utils/helpers.py b/rt_segment_speeds/segment_speed_utils/helpers.py index aa31d557c..7e100ff12 100644 --- a/rt_segment_speeds/segment_speed_utils/helpers.py +++ b/rt_segment_speeds/segment_speed_utils/helpers.py @@ -14,8 +14,10 @@ import yaml from typing import Literal, Union -from segment_speed_utils.project_vars import SEGMENT_GCS, COMPILED_CACHED_VIEWS, PROJECT_CRS -from shared_utils import utils +from segment_speed_utils.project_vars import (SEGMENT_GCS, + COMPILED_CACHED_VIEWS, + PROJECT_CRS) +from calitp_data_analysis import utils fs = gcsfs.GCSFileSystem() From df0a34e2e251653b13e6ea5549f3f7940c0a1248 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 29 Sep 2023 00:22:54 +0000 Subject: [PATCH 12/14] shared_utils to calitp_data_analysis for rt_predictions/ --- rt_predictions/chart_utils.py | 2 +- rt_predictions/sample_query_materialized_tables.py | 3 +-- rt_predictions/summarize_sampled_updates.py | 5 ++--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rt_predictions/chart_utils.py b/rt_predictions/chart_utils.py index db6c10beb..ca4a8622a 100644 --- a/rt_predictions/chart_utils.py +++ b/rt_predictions/chart_utils.py @@ -1,6 +1,6 @@ import altair as alt -from shared_utils import calitp_color_palette as cp import pandas as pd +from calitp_data_analysis import calitp_color_palette as cp def altair_dropdown(df, column_for_dropdown:str, title_of_dropdown:str): """ diff --git a/rt_predictions/sample_query_materialized_tables.py b/rt_predictions/sample_query_materialized_tables.py index f96d4815b..4a07f957f 100644 --- a/rt_predictions/sample_query_materialized_tables.py +++ b/rt_predictions/sample_query_materialized_tables.py @@ -10,18 +10,17 @@ os.environ["CALITP_BQ_MAX_BYTES"] = str(12_000_000_000_000) os.environ['USE_PYGEOS'] = '0' +import numpy as np import pandas as pd from siuba import * from calitp_data_analysis.sql import query_sql from calitp_data_analysis.tables import tbls -import shared_utils import datetime as dt from segment_speed_utils.project_vars import (PREDICTIONS_GCS, analysis_date) analysis_date = dt.datetime.fromisoformat(analysis_date) -import numpy as np sampling_periods = {} sampling_periods['am'] = [dt.datetime.combine(analysis_date, dt.time(8, 0)), diff --git a/rt_predictions/summarize_sampled_updates.py b/rt_predictions/summarize_sampled_updates.py index fb45dabed..17cb5c64d 100644 --- a/rt_predictions/summarize_sampled_updates.py +++ b/rt_predictions/summarize_sampled_updates.py @@ -2,12 +2,13 @@ os.environ["CALITP_BQ_MAX_BYTES"] = str(12_000_000_000_000) os.environ['USE_PYGEOS'] = '0' +import numpy as np import pandas as pd +import pytz from siuba import * from calitp_data_analysis.sql import query_sql from calitp_data_analysis.tables import tbls -import shared_utils import datetime as dt import sample_query_materialized_tables as smpl @@ -15,8 +16,6 @@ analysis_date) analysis_date = dt.datetime.fromisoformat(analysis_date) -import pytz -import numpy as np service_levels = smpl.get_service_levels() tu_datasets = smpl.get_tu_datasets() From ca8fe41378930c7cb5a78504484aa406475e1c34 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 29 Sep 2023 00:26:21 +0000 Subject: [PATCH 13/14] shared_utils to calitp_data_analysis for rt_scheduled_v_ran/ --- rt_scheduled_v_ran/utils.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/rt_scheduled_v_ran/utils.py b/rt_scheduled_v_ran/utils.py index d0abc451d..fee533910 100644 --- a/rt_scheduled_v_ran/utils.py +++ b/rt_scheduled_v_ran/utils.py @@ -5,31 +5,23 @@ import os os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB? -from calitp_data_analysis.tables import tbls -from calitp_data_analysis.sql import query_sql -import calitp_data_analysis.magics +import altair as alt import branca -from siuba import * -import pandas as pd - -import datetime as dt -# import time -# from zoneinfo import ZoneInfo - -# import importlib - import gcsfs -fs = gcsfs.GCSFileSystem() +import pandas as pd -# from tqdm import tqdm_notebook -# from tqdm.notebook import trange, tqdm +from siuba import * +from calitp_data_analysis.tables import tbls +from calitp_data_analysis.sql import query_sql +import calitp_data_analysis.magics -import altair as alt -from shared_utils import portfolio_utils, geography_utils, styleguide -from shared_utils import calitp_color_palette as cp +from shared_utils import portfolio_utils +from calitp_data_analysis import geography_utils, styleguide +from calitp_data_analysis import calitp_color_palette as cp from dla_utils import _dla_utils as dla_utils +fs = gcsfs.GCSFileSystem() # Read in complete data table def read_data(): @@ -189,7 +181,7 @@ def get_agg_pct(df, sum_vp: list, ): - agg_df = (geography_utils.aggregate_by_geography(df, + agg_df = (portfolio_utils.aggregate_by_geography(df, group_cols = groupings, sum_cols = [sum_sched, sum_vp] ))>>mutate(avg = _[sum_vp]/_[sum_sched]) From ba9bc383b73dad35816afea2ad9a28badcbbadd4 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 29 Sep 2023 00:29:45 +0000 Subject: [PATCH 14/14] shared_utils to calitp_data_analysis for starter_kit/, don't touch tutorials yet --- starter_kit/gtfs_utils_v2_examples.ipynb | 2 +- starter_kit/shared_utils_examples.ipynb | 12 +++++------- starter_kit/simple_dask.py | 3 +-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/starter_kit/gtfs_utils_v2_examples.ipynb b/starter_kit/gtfs_utils_v2_examples.ipynb index 1ad19ada4..3f697badc 100644 --- a/starter_kit/gtfs_utils_v2_examples.ipynb +++ b/starter_kit/gtfs_utils_v2_examples.ipynb @@ -33,7 +33,7 @@ "\n", "from siuba import *\n", "\n", - "import gtfs_utils_v2\n", + "from shared_utils import gtfs_utils_v2\n", "analysis_date = datetime.date(2023, 1, 17)" ] }, diff --git a/starter_kit/shared_utils_examples.ipynb b/starter_kit/shared_utils_examples.ipynb index bb7e5dab5..9a9bbd95d 100644 --- a/starter_kit/shared_utils_examples.ipynb +++ b/starter_kit/shared_utils_examples.ipynb @@ -52,8 +52,8 @@ "from calitp_data_analysis.tables import tbls\n", "from siuba import *\n", "\n", - "from calitp_data_analysis import geography_utils\n", - "from shared_utils import geography_utils, utils" + "from calitp_data_analysis import geography_utils, utils\n", + "from shared_utils import portfolio_utils" ] }, { @@ -424,7 +424,7 @@ "count_cols = [\"pickup\"]\n", "nunique_cols = [\"pickup_zone\"]\n", "\n", - "by_borough = geography_utils.aggregate_by_geography(\n", + "by_borough = portfolio_utils.aggregate_by_geography(\n", " df[df.pickup_borough.notna()], \n", " group_cols=group_cols,\n", " sum_cols = sum_cols,\n", @@ -749,7 +749,7 @@ } ], "source": [ - "df2 = geography_utils.aggregate_by_geography(\n", + "df2 = portfolio_utils.aggregate_by_geography(\n", " df[(df.payment.notna()) & (df.pickup_borough.notna())], \n", " group_cols = [\"pickup_borough\", \"payment\"],\n", " sum_cols = [\"passengers\", \"fare\"],\n", @@ -981,9 +981,7 @@ "source": [ "import branca\n", "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from calitp_data_analysis import geography_utils" + "import pandas as pd" ] }, { diff --git a/starter_kit/simple_dask.py b/starter_kit/simple_dask.py index 585b435df..27a31dcbb 100644 --- a/starter_kit/simple_dask.py +++ b/starter_kit/simple_dask.py @@ -1,10 +1,9 @@ # Simple script to test dask clutser import dask.dataframe as dd -import gcsfs import os import pandas as pd -from calitp_data_analysis import get_fs +from calitp_data_infra.storage import get_fs fs = get_fs() RT_GCS = 'gs://calitp-analytics-data/data-analyses/rt_delay/compiled_cached_views/'