Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update references for geography_utils #1322

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
v for k, v in DATES.items() if k.endswith("2023") and not any(substring in k for substring in ["jan", "feb"])
]

y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]
y2024_dates = [v for k, v in DATES.items() if k.endswith("2024") and k not in ["oct2024g"]]


valid_weeks = ["apr2023", "oct2023", "apr2024", "oct2024"]
Expand Down
8 changes: 5 additions & 3 deletions _shared_utils/shared_utils/rt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def get_vehicle_positions(ix_df: pd.DataFrame) -> gpd.GeoDataFrame:
vp_all = gpd.read_parquet(f"{VP_FILE_PATH}vp_{date_str}.parquet")
org_vp = vp_all >> filter(_.gtfs_dataset_key.isin(ix_df.vehicle_positions_gtfs_dataset_key))
org_vp = org_vp >> select(-_.location_timestamp, -_.service_date, -_.activity_date)
org_vp = org_vp.to_crs(geography_utils.CA_NAD83Albers)
org_vp = org_vp.to_crs(geography_utils.CA_NAD83Albers_m)
utils.geoparquet_gcs_export(org_vp, GCS_FILE_PATH + V2_SUBFOLDER, filename)

return org_vp
Expand Down Expand Up @@ -459,7 +459,9 @@ def get_stops(ix_df: pd.DataFrame) -> gpd.GeoDataFrame:
org_stops = gpd.read_parquet(path)
else:
feed_key_list = list(ix_df.feed_key.unique())
org_stops = gtfs_utils_v2.get_stops(service_date, feed_key_list, stop_cols, crs=geography_utils.CA_NAD83Albers)
org_stops = gtfs_utils_v2.get_stops(
service_date, feed_key_list, stop_cols, crs=geography_utils.CA_NAD83Albers_m
)
utils.geoparquet_gcs_export(org_stops, GCS_FILE_PATH + V2_SUBFOLDER, filename)

return org_stops
Expand All @@ -478,7 +480,7 @@ def get_shapes(ix_df: pd.DataFrame) -> gpd.GeoDataFrame:
else:
feed_key_list = list(ix_df.feed_key.unique())
org_shapes = gtfs_utils_v2.get_shapes(
service_date, feed_key_list, crs=geography_utils.CA_NAD83Albers, shape_cols=shape_cols
service_date, feed_key_list, crs=geography_utils.CA_NAD83Albers_m, shape_cols=shape_cols
)
# invalid geos are nones in new df...
org_shapes = org_shapes.dropna(subset=["geometry"])
Expand Down
8 changes: 4 additions & 4 deletions _shared_utils/shared_utils/shared_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def make_county_centroids():
"""
URL = "https://opendata.arcgis.com/datasets/" "8713ced9b78a4abb97dc130a691a8695_0.geojson"

gdf = gpd.read_file(URL).to_crs(geography_utils.CA_StatePlane)
gdf = gpd.read_file(URL).to_crs(geography_utils.CA_NAD83Albers_ft)
gdf.columns = gdf.columns.str.lower()

gdf = (
Expand Down Expand Up @@ -167,7 +167,7 @@ def segment_highway_lines_by_postmile(gdf: gpd.GeoDataFrame):

# Assign segment geometry and overwrite the postmile geometry column
gdf2 = (
gdf.assign(geometry=gpd.GeoSeries(segment_geom, crs=geography_utils.CA_NAD83Albers))
gdf.assign(geometry=gpd.GeoSeries(segment_geom, crs=geography_utils.CA_NAD83Albers_m))
.drop(columns=drop_cols)
.set_geometry("geometry")
)
Expand Down Expand Up @@ -205,7 +205,7 @@ def create_postmile_segments(
.explode("geometry")
.reset_index(drop=True)
.pipe(round_odometer_values, ["bodometer", "eodometer"], num_decimals=3)
.to_crs(geography_utils.CA_NAD83Albers)
.to_crs(geography_utils.CA_NAD83Albers_m)
)

# Have a list accompany the geometry
Expand All @@ -222,7 +222,7 @@ def create_postmile_segments(
f"{GCS_FILE_PATH}state_highway_network_postmiles.parquet", columns=group_cols + ["odometer", "geometry"]
)
.pipe(round_odometer_values, ["odometer"], num_decimals=3)
.to_crs(geography_utils.CA_NAD83Albers)
.to_crs(geography_utils.CA_NAD83Albers_m)
)
# Round to 3 digits for odometer. When there are more decimal places, it makes our cutoffs iffy
# when we use this condition below: odometer >= bodometer & odometer <= eodometer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def process_transit_routes(
## Clean transit routes
df = df.assign(
route_length = df.to_crs(
geography_utils.CA_StatePlane).geometry.length
).to_crs(geography_utils.CA_StatePlane)
geography_utils.CA_NAD83Albers_ft).geometry.length
).to_crs(geography_utils.CA_NAD83Albers_ft)

# Get it down to route_id and pick longest shape
df2 = (df.sort_values(operator_cols + ["route_id", "route_length"],
Expand Down Expand Up @@ -63,7 +63,7 @@ def prep_highway_directions_for_dissolve(
'''
df = (gpd.read_parquet("gs://calitp-analytics-data/data-analyses/"
"shared_data/state_highway_network.parquet")
.to_crs(geography_utils.CA_StatePlane))
.to_crs(geography_utils.CA_NAD83Albers_ft))

# Get dummies for direction
# Can make data wide instead of long
Expand Down
4 changes: 2 additions & 2 deletions bus_service_increase/create_analysis_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def get_shapes(selected_date: str) -> gpd.GeoDataFrame:
selected_date,
columns = ["shape_array_key", "geometry"],
get_pandas = True,
crs = geography_utils.CA_NAD83Albers
crs = geography_utils.CA_NAD83Albers_m
).pipe(
helpers.remove_shapes_outside_ca
).merge(
Expand All @@ -151,7 +151,7 @@ def get_shapes(selected_date: str) -> gpd.GeoDataFrame:


def dissolve_census_tracts(
crs: str = geography_utils.CA_NAD83Albers
crs: str = geography_utils.CA_NAD83Albers_m
) -> gpd.GeoDataFrame:
census_tracts = (
catalog.calenviroscreen_lehd_by_tract.read()
Expand Down
2 changes: 1 addition & 1 deletion bus_service_increase/highways-existing-transit.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"plot_df = gdf[\n",
" gdf.route_length >= geography_utils.FEET_PER_MI * 0.5\n",
" ].assign(\n",
" geometry = (gdf.geometry.to_crs(geography_utils.CA_StatePlane)\n",
" geometry = (gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft)\n",
" .buffer(300)\n",
" .to_crs(geography_utils.WGS84)\n",
" )\n",
Expand Down
2 changes: 1 addition & 1 deletion la_metro_demo/A2_clean_up_gtfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# LA Metro data is for Oct 2022, so let's use the date we already downloaded
analysis_date = rt_dates.DATES["oct2022"]
PROJECT_CRS = geography_utils.CA_NAD83Albers
PROJECT_CRS = geography_utils.CA_NAD83Albers_m


def fill_missing_route_short_name(df: pd.DataFrame) -> pd.DataFrame:
Expand Down
2 changes: 1 addition & 1 deletion la_metro_demo/A3_assemble_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import A2_clean_up_gtfs as clean_up_gtfs

PROJECT_CRS = geography_utils.CA_NAD83Albers
PROJECT_CRS = geography_utils.CA_NAD83Albers_m
BUS_SERVICE_GCS = "gs://calitp-analytics-data/data-analyses/bus_service_increase/"


Expand Down
4 changes: 2 additions & 2 deletions open_data/create_stops_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def add_distance_to_state_highway(
orig_crs = stops.crs

shn = catalog.state_highway_network.read()[
["geometry"]].to_crs(geography_utils.CA_NAD83Albers).geometry.iloc[0]
["geometry"]].to_crs(geography_utils.CA_NAD83Albers_m).geometry.iloc[0]

stops = stops.to_crs(geography_utils.CA_NAD83Albers)
stops = stops.to_crs(geography_utils.CA_NAD83Albers_m)

stops = stops.assign(
meters_to_shn = stops.geometry.distance(shn).round(1)
Expand Down
2 changes: 1 addition & 1 deletion py_crow_flies/py_crow_flies.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
"outputs": [],
"source": [
"# Transform the grid points to your preferred CRS\n",
"central = central.to_crs(shared_utils.geography_utils.CA_NAD83Albers).set_index('pointid')\n",
"central = central.to_crs(shared_utils.geography_utils.CA_NAD83Albers_m).set_index('pointid')\n",
"central = central >> select(-_.Point_ID)"
]
},
Expand Down
6 changes: 3 additions & 3 deletions rt_segment_speeds/11_tiger.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
" f\"{SHARED_GCS}all_roads_2020_state06.parquet\",\n",
" filters=[(\"MTFCC\", \"in\", road_type_wanted)],\n",
" columns=[\"LINEARID\", \"geometry\", \"FULLNAME\"],\n",
" ).to_crs(geography_utils.CA_NAD83Albers)\n",
" ).to_crs(geography_utils.CA_NAD83Albers_m)\n",
"\n",
" # If a road has mutliple rows but the same\n",
" # linear ID, dissolve it so it becomes one row.\n",
Expand Down Expand Up @@ -238,7 +238,7 @@
" .drop_duplicates()\n",
" )\n",
"\n",
" stops = stops.set_crs(geography_utils.CA_NAD83Albers)\n",
" stops = stops.set_crs(geography_utils.CA_NAD83Albers_m)\n",
"\n",
" # Buffer each stop by 50 feet\n",
" stops = stops.assign(buffered_geometry=stops.geometry.buffer(50))\n",
Expand Down Expand Up @@ -287,7 +287,7 @@
" \"\"\"\n",
" gtfs_shapes = helpers.import_scheduled_shapes(date).compute().drop_duplicates()\n",
"\n",
" gtfs_shapes = gtfs_shapes.set_crs(geography_utils.CA_NAD83Albers)\n",
" gtfs_shapes = gtfs_shapes.set_crs(geography_utils.CA_NAD83Albers_m)\n",
"\n",
" trips = (\n",
" helpers.import_scheduled_trips(date, (), [\"name\", \"shape_array_key\"])\n",
Expand Down
6 changes: 3 additions & 3 deletions rt_segment_speeds/segment_speed_utils/parallel_corridors.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def process_transit_routes(analysis_date: str) -> gpd.GeoDataFrame:

# Get this to same CRS as highways
gdf = gdf.assign(
route_length_feet = gdf.geometry.to_crs(geography_utils.CA_StatePlane).length
).drop(columns = "route_length").to_crs(geography_utils.CA_StatePlane)
route_length_feet = gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft).length
).drop(columns = "route_length").to_crs(geography_utils.CA_NAD83Albers_ft)


return gdf
Expand All @@ -65,7 +65,7 @@ def process_highways(
direction_cols = ["NB", "SB", "EB", "WB"]

df = (gpd.read_parquet(SHN_FILE)
.to_crs(geography_utils.CA_StatePlane)
.to_crs(geography_utils.CA_NAD83Albers_ft)
)

# Get dummies for direction
Expand Down
4 changes: 2 additions & 2 deletions thruway_bus_validators/A2_plot_amtrak_thruway.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"outputs": [],
"source": [
"gdf = gdf.assign(\n",
" route_mi = ((gdf.geometry.to_crs(geography_utils.CA_StatePlane)\n",
" route_mi = ((gdf.geometry.to_crs(geography_utils.CA_NAD83Albers_ft)\n",
" .length).divide(geography_utils.FEET_PER_MI)).round(2)\n",
")"
]
Expand Down Expand Up @@ -470,7 +470,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
"version": "3.9.13"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions thruway_bus_validators/A4_local_routes_same_od.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def keep_long_shape_ids(routelines: dg.GeoDataFrame | gpd.GeoDataFrame,
Filter down routelines file to just routes that are pretty long
with shape_id.
"""
routelines = routelines.to_crs(geography_utils.CA_StatePlane)
routelines = routelines.to_crs(geography_utils.CA_NAD83Albers_ft)

routelines = routelines.assign(
route_mi = routelines.geometry.length.divide(
Expand Down Expand Up @@ -115,7 +115,7 @@ def buffer_around_origin_destination(gdf: gpd.GeoDataFrame,
geom_cols = list(gdf.select_dtypes("geometry").columns)

for c in geom_cols:
gdf[c] = gdf[c].to_crs(geography_utils.CA_StatePlane)
gdf[c] = gdf[c].to_crs(geography_utils.CA_NAD83Albers_ft)

gdf = gdf.assign(
origin_buffer = gdf.origin.buffer(buffer_feet),
Expand Down
Loading