Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create layer and metric for percentpop within isochrone #87

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions city_metrix/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@
from .nasa_dem import NasaDEM
from .era_5_hottest_day import Era5HottestDay
from .impervious_surface import ImperviousSurface
from .isoline import Isoline
42 changes: 42 additions & 0 deletions city_metrix/layers/isoline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import geopandas as gpd
import shapely
import boto3

from .layer import Layer, get_utm_zone_epsg

BUCKET_NAME = 'wri-cities-indicators'
PREFIX = 'data/isolines/'

class Isoline(Layer):
def __init__(self, params, aws_profilename=None, **kwargs):
super().__init__(**kwargs)
# params is dict with keys cityname, amenityname, travelmode, threshold_type, threshold_value, year
cityname = params['cityname']
amenityname = params['amenityname']
travelmode = params['travelmode']
threshold_type = params['threshold_type']
threshold_value = params['threshold_value']
year = params['year']
# Get list of isoline files from S3 and find the most recent file with correct cityname, amenity, etc
if aws_profilename is None:
session = boto3.Session()
else:
session = boto3.Session(profile_name=aws_profilename)
s3 = session.client('s3')
obj_list = s3.list_objects(Bucket=BUCKET_NAME, Prefix=PREFIX)['Contents']
objnames = [i['Key'].replace(PREFIX, '') for i in obj_list if len(i['Key'].replace(PREFIX, '')) > 0]
matches = [oname for oname in objnames if oname.split('.')[0].split('_')[:-1] == [cityname, amenityname, travelmode, threshold_type, str(threshold_value)]]
if year is not None:
matches = [oname for oname in matches if oname.split('.')[0].split('_')[-1][:4] == str(year)]
if len(matches) == 0:
raise Exception('No isoline file found.')
matches.sort(key=lambda x: int(x.split('.')[0].split('_')[-1]))
objname = matches[-1]

# Get data from S3
url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{objname}'
gdf = gpd.read_file(url)
self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry')

def get_data(self, bbox):
return self.gdf.clip(shapely.box(*bbox))
1 change: 1 addition & 0 deletions city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def write_layer(path, data):
data.to_file(path, driver="GeoJSON")
else:
raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame")
raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame")

def write_dataarray(path, data):
# for rasters, need to write to locally first then copy to cloud storage
Expand Down
1 change: 1 addition & 0 deletions city_metrix/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .urban_open_space import urban_open_space
from .natural_areas import natural_areas
from .era_5_met_preprocessing import era_5_met_preprocessing
from .percent_population_access import percent_population_access
63 changes: 63 additions & 0 deletions city_metrix/metrics/percent_population_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from geopandas import GeoDataFrame, GeoSeries
import numpy as np

from city_metrix.layers.isoline import Isoline
from city_metrix.layers.world_pop import WorldPop




def percent_population_access(zones: GeoDataFrame, cityname, amenityname, travelmode, threshold_type, threshold_value, isoline_year=2024, agesex_classes=[], worldpop_year=2020, aws_profilename=None) -> GeoSeries:

def get_accessible_population(access_features_layer, popraster_layer, zones):
if len(access_features_layer.gdf):
result_series = popraster_layer.mask(access_features_layer).groupby(zones).mean() * popraster_layer.mask(access_features_layer).groupby(zones).count()
else:
result_series = pd.Series([0] * len(zones))
return result_series

# cityname example: ARG-Buenos-Aires
# amenityname is OSMclass names, in lowercase
# travelmode is walk, bike, automobile, publictransit (only walk implemented for now)
# threshold_type is distance or time
# threshold_value is integer, in minutes or meters
population_layer = WorldPop(agesex_classes=agesex_classes, year=worldpop_year)
params = {
'cityname': cityname,
'amenityname': amenityname,
'travelmode': travelmode,
'threshold_type': threshold_type,
'threshold_value': threshold_value,
'year': isoline_year
}
accesszone_layer = Isoline(params, aws_profilename=aws_profilename)

result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326')

try:
access_pop = get_accessible_population(accesszone_layer, population_layer, zones)
total_pop = population_layer.groupby(zones).mean() * population_layer.groupby(zones).count()
result = (access_pop / total_pop) * 100
result_gdf['access_fraction'] = result

except:
# Sometimes doing entire zones gdf causes groupby to throw empty-GDF error -- workaraound is to go district-by-district
print('Calculating district-by-district')

access_fraction = []
for idx in zones.index:
try: # Sometimes there's still an empty-gdf error
access_pop = get_accessible_population(accesszone_layer, population_layer, zones.loc[[zones.index[idx]]])[0]
total_pop = (population_layer.groupby(zones.loc[[zones.index[idx]]]).mean() * population_layer.groupby(zones.loc[[zones.index[idx]]]).count())[0]
if total_pop != 0:
access_fraction.append(access_pop / total_pop)
else:
access_fraction.append(np.nan)
except:
print('Empty-GDF error for index {0}'.format(idx))
access_fraction.append(np.nan)
result_gdf['access_fraction'] = access_fraction
result_gdf['access_fraction'].replace([np.inf, -np.inf], np.nan, inplace=True)
result_gdf['access_fraction'] = result_gdf['access_fraction'] * 100

return result_gdf
7 changes: 7 additions & 0 deletions tests/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
EsaWorldCoverClass,
HighLandSurfaceTemperature,
ImperviousSurface,
Isoline,
LandsatCollection2,
LandSurfaceTemperature,
NasaDEM,
Expand Down Expand Up @@ -66,6 +67,12 @@ def test_impervious_surface():
data = ImperviousSurface().get_data(BBOX)
assert np.size(data) > 0

def test_isoline():
layer = Isoline('KEN-Nairobi', 'schools', 'walk', 'time', '15')
nairobi_bbox = (36.66446402, -1.44560888, 37.10497899, -1.16058296)
data = layer.get_data(nairobi_bbox)
assert np.size(data) > 0

def test_land_surface_temperature():
data = LandSurfaceTemperature().get_data(BBOX)
assert np.size(data) > 0
Expand Down