From ca2596a99e0798a845ca354a38ffbaac56973b2f Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Thu, 24 Oct 2024 14:36:57 -0400 Subject: [PATCH 1/4] initial commit --- city_metrix/layers/__init__.py | 1 + city_metrix/layers/isochrone.py | 13 +++++++ .../percent_population_within_isochrone.py | 38 +++++++++++++++++++ tests/test_layers.py | 7 ++++ 4 files changed, 59 insertions(+) create mode 100644 city_metrix/layers/isochrone.py create mode 100644 city_metrix/metrics/percent_population_within_isochrone.py diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py index e701e52..1c42593 100644 --- a/city_metrix/layers/__init__.py +++ b/city_metrix/layers/__init__.py @@ -21,3 +21,4 @@ from .nasa_dem import NasaDEM from .era_5_hottest_day import Era5HottestDay from .impervious_surface import ImperviousSurface +from .isochrone import Isochrone diff --git a/city_metrix/layers/isochrone.py b/city_metrix/layers/isochrone.py new file mode 100644 index 0000000..9b3d8a3 --- /dev/null +++ b/city_metrix/layers/isochrone.py @@ -0,0 +1,13 @@ +import geopandas as gpd +import shapely + +from .layer import Layer, get_utm_zone_epsg + +class Isochrone(Layer): + def __init__(self, geojson_filepath, **kwargs): + super().__init__(**kwargs) + gdf = gpd.read_file(geojson_filepath) + self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry') + + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) diff --git a/city_metrix/metrics/percent_population_within_isochrone.py b/city_metrix/metrics/percent_population_within_isochrone.py new file mode 100644 index 0000000..313c490 --- /dev/null +++ b/city_metrix/metrics/percent_population_within_isochrone.py @@ -0,0 +1,38 @@ +from geopandas import GeoDataFrame, GeoSeries +import numpy as np + +from city_metrix.layers.isochrone import Isochrone +from city_metrix.layers.world_pop import WorldPop + + +def get_accessible_population(access_features_layer, popraster_layer, zones): + if len(access_features_layer.gdf): + result_series = popraster_layer.mask(access_features_layer).groupby(zones).sum() + else: + result_series = pd.Series([0] * len(zones)) + return result_series + +def percent_population_within_isochrone(zones: GeoDataFrame, isochrone_filename, agesex_classes=[], worldpop_year=2020) -> GeoSeries: + population_layer = WorldPop(agesex_classes=agesex_classes, worldpop_year=worldpop_year) + accesszone_layer = Isochrone(isochrone_filename) + + try: + access_pop = get_accessible_population(accesszone_layer, population_layer, zones) + total_pop = population_layer.groupby(zones).sum() + result = (access_pop / total_pop) * 100 + + except: + # Sometimes doing entire zones gdf causes groupby to throw empty-GDF error -- workaraound is to go district-by-district + result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326') + access_fraction = [] + for idx in zones.index: + access_pop = get_accessible_population(accesszone_layer, population_layer, zones.loc[[zones.index[idx]]])[0] + total_pop = population_layer.groupby(zones.loc[[zones.index[idx]]]).sum()[0] + if total_pop != 0: + access_fraction.append(access_pop / total_pop) + else: + access_fraction.append(np.nan) + + result = access_fraction.replace([np.inf,], np.nan) * 100 + + return result \ No newline at end of file diff --git a/tests/test_layers.py b/tests/test_layers.py index f8e1fe2..6677252 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -10,6 +10,7 @@ EsaWorldCoverClass, HighLandSurfaceTemperature, ImperviousSurface, + Isochrone, LandsatCollection2, LandSurfaceTemperature, NasaDEM, @@ -66,6 +67,12 @@ def test_impervious_surface(): data = ImperviousSurface().get_data(BBOX) assert np.size(data) > 0 +def test_isochrone(): + layer = Isochrone('https://wri-cities-indicators.s3.us-east-1.amazonaws.com/data/isochrones/nairobi_schools.geojson') + nairobi_bbox = (36.66446402, -1.44560888, 37.10497899, -1.16058296) + data = layer.get_data(nairobi_bbox) + assert np.size(data) > 0 + def test_land_surface_temperature(): data = LandSurfaceTemperature().get_data(BBOX) assert np.size(data) > 0 From 03aa10cef742cd10e33cd42b6fc78d2652f39b61 Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Wed, 20 Nov 2024 17:21:08 -0500 Subject: [PATCH 2/4] change isochrone to isoline, address filename naming convention --- city_metrix/layers/__init__.py | 2 +- city_metrix/layers/isochrone.py | 13 -------- city_metrix/layers/isoline.py | 32 +++++++++++++++++++ city_metrix/metrics/__init__.py | 1 + ...y => percent_population_within_isoline.py} | 13 +++++--- tests/test_layers.py | 6 ++-- 6 files changed, 46 insertions(+), 21 deletions(-) delete mode 100644 city_metrix/layers/isochrone.py create mode 100644 city_metrix/layers/isoline.py rename city_metrix/metrics/{percent_population_within_isochrone.py => percent_population_within_isoline.py} (66%) diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py index 1c42593..694bd1c 100644 --- a/city_metrix/layers/__init__.py +++ b/city_metrix/layers/__init__.py @@ -21,4 +21,4 @@ from .nasa_dem import NasaDEM from .era_5_hottest_day import Era5HottestDay from .impervious_surface import ImperviousSurface -from .isochrone import Isochrone +from .isoline import Isoline diff --git a/city_metrix/layers/isochrone.py b/city_metrix/layers/isochrone.py deleted file mode 100644 index 9b3d8a3..0000000 --- a/city_metrix/layers/isochrone.py +++ /dev/null @@ -1,13 +0,0 @@ -import geopandas as gpd -import shapely - -from .layer import Layer, get_utm_zone_epsg - -class Isochrone(Layer): - def __init__(self, geojson_filepath, **kwargs): - super().__init__(**kwargs) - gdf = gpd.read_file(geojson_filepath) - self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry') - - def get_data(self, bbox): - return self.gdf.clip(shapely.box(*bbox)) diff --git a/city_metrix/layers/isoline.py b/city_metrix/layers/isoline.py new file mode 100644 index 0000000..f799b0c --- /dev/null +++ b/city_metrix/layers/isoline.py @@ -0,0 +1,32 @@ +import geopandas as gpd +import shapely +import boto3 + +from .layer import Layer, get_utm_zone_epsg + +BUCKET_NAME = 'wri-cities-indicators' +PREFIX = 'data/isolines/' + +class Isochrone(Layer): + def __init__(self, cityname, amenityname, travelmode, threshold_type, threshold_value, year=None, **kwargs): + super().__init__(**kwargs) + + # Get list of isoline files from S3 and find the most recent file with correct cityname, amenity, etc + s3 = boto3.client('s3') + obj_list = s3.list_objects(Bucket=BUCKET_NAME, Prefix=PREFIX)['Contents'] + objnames = [i['Key'].replace(PREFIX, '') for i in obj_list if len(i['Key'].replace(PREFIX, '')) > 0] + matches = [oname for oname in objnames if oname.split('_')[:-1] == [cityname, amenityname, travelmode, threshold_type, threshold_value]] + if year is not None: + matches = [oname for oname in matches if oname.split('.')[0].split('_')[-1][:4] == str(year)] + if len(matches) == 0: + raise Exception('No isoline file found.') + matches.sort(key=lambda x: int(x.split('.')[0].split('_')[-1])) + objname = matches[-1] + + # Get data from S3 + url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{objname}' + gdf = gpd.read_file(url) + self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry') + + def get_data(self, bbox): + return self.gdf.clip(shapely.box(*bbox)) diff --git a/city_metrix/metrics/__init__.py b/city_metrix/metrics/__init__.py index d95cfa5..f2a01e3 100644 --- a/city_metrix/metrics/__init__.py +++ b/city_metrix/metrics/__init__.py @@ -5,3 +5,4 @@ from .urban_open_space import urban_open_space from .natural_areas import natural_areas from .era_5_met_preprocessing import era_5_met_preprocessing +from .percent_population_within_isoline import percent_population_within_isoline \ No newline at end of file diff --git a/city_metrix/metrics/percent_population_within_isochrone.py b/city_metrix/metrics/percent_population_within_isoline.py similarity index 66% rename from city_metrix/metrics/percent_population_within_isochrone.py rename to city_metrix/metrics/percent_population_within_isoline.py index 313c490..79dcf07 100644 --- a/city_metrix/metrics/percent_population_within_isochrone.py +++ b/city_metrix/metrics/percent_population_within_isoline.py @@ -1,7 +1,7 @@ from geopandas import GeoDataFrame, GeoSeries import numpy as np -from city_metrix.layers.isochrone import Isochrone +from city_metrix.layers.isochrone import Isoline from city_metrix.layers.world_pop import WorldPop @@ -12,9 +12,14 @@ def get_accessible_population(access_features_layer, popraster_layer, zones): result_series = pd.Series([0] * len(zones)) return result_series -def percent_population_within_isochrone(zones: GeoDataFrame, isochrone_filename, agesex_classes=[], worldpop_year=2020) -> GeoSeries: - population_layer = WorldPop(agesex_classes=agesex_classes, worldpop_year=worldpop_year) - accesszone_layer = Isochrone(isochrone_filename) +def percent_population_within_isoline(zones: GeoDataFrame, cityname, amenityname, travelmode, threshold_type, threshold_value, isoline_year=2024, agesex_classes=[], worldpop_year=2020) -> GeoSeries: + # cityname example: ARG-Buenos-Aires + # amenityname is OSMclass names, in lowercase + # travelmode is walk, bike, automobile, publictransit (only walk implemented for now) + # threshold_type is distance or time + # threshold_value is integer, in minutes or meters + population_layer = WorldPop(agesex_classes=agesex_classes, worldpop_year=worldpop_year) + accesszone_layer = Isoline(cityname, amenityname, travelmode, threshold_type, threshold_value, year) try: access_pop = get_accessible_population(accesszone_layer, population_layer, zones) diff --git a/tests/test_layers.py b/tests/test_layers.py index 6677252..8ca615f 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -10,7 +10,7 @@ EsaWorldCoverClass, HighLandSurfaceTemperature, ImperviousSurface, - Isochrone, + Isoline, LandsatCollection2, LandSurfaceTemperature, NasaDEM, @@ -67,8 +67,8 @@ def test_impervious_surface(): data = ImperviousSurface().get_data(BBOX) assert np.size(data) > 0 -def test_isochrone(): - layer = Isochrone('https://wri-cities-indicators.s3.us-east-1.amazonaws.com/data/isochrones/nairobi_schools.geojson') +def test_isoline(): + layer = Isoline('KEN-Nairobi', 'schools', 'walk', 'time', '15') nairobi_bbox = (36.66446402, -1.44560888, 37.10497899, -1.16058296) data = layer.get_data(nairobi_bbox) assert np.size(data) > 0 From 1996fa6f23d13d202018e6e9d20af4532c572437 Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Thu, 21 Nov 2024 21:24:39 -0500 Subject: [PATCH 3/4] interact with AWS --- city_metrix/layers/isoline.py | 44 ++++++++------ city_metrix/layers/layer.py | 1 + city_metrix/metrics/__init__.py | 2 +- .../metrics/percent_population_access.py | 60 +++++++++++++++++++ .../percent_population_within_isoline.py | 43 ------------- 5 files changed, 89 insertions(+), 61 deletions(-) create mode 100644 city_metrix/metrics/percent_population_access.py delete mode 100644 city_metrix/metrics/percent_population_within_isoline.py diff --git a/city_metrix/layers/isoline.py b/city_metrix/layers/isoline.py index f799b0c..523a489 100644 --- a/city_metrix/layers/isoline.py +++ b/city_metrix/layers/isoline.py @@ -7,24 +7,34 @@ BUCKET_NAME = 'wri-cities-indicators' PREFIX = 'data/isolines/' -class Isochrone(Layer): - def __init__(self, cityname, amenityname, travelmode, threshold_type, threshold_value, year=None, **kwargs): +class Isoline(Layer): + def __init__(self, params, aws_profilename=None, **kwargs): super().__init__(**kwargs) - - # Get list of isoline files from S3 and find the most recent file with correct cityname, amenity, etc - s3 = boto3.client('s3') - obj_list = s3.list_objects(Bucket=BUCKET_NAME, Prefix=PREFIX)['Contents'] - objnames = [i['Key'].replace(PREFIX, '') for i in obj_list if len(i['Key'].replace(PREFIX, '')) > 0] - matches = [oname for oname in objnames if oname.split('_')[:-1] == [cityname, amenityname, travelmode, threshold_type, threshold_value]] - if year is not None: - matches = [oname for oname in matches if oname.split('.')[0].split('_')[-1][:4] == str(year)] - if len(matches) == 0: - raise Exception('No isoline file found.') - matches.sort(key=lambda x: int(x.split('.')[0].split('_')[-1])) - objname = matches[-1] - - # Get data from S3 - url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{objname}' + # params is dict with keys cityname, amenityname, travelmode, threshold_type, threshold_value, year + cityname = params['cityname'] + amenityname = params['amenityname'] + travelmode = params['travelmode'] + threshold_type = params['threshold_type'] + threshold_value = params['threshold_value'] + year = params['year'] + # Get list of isoline files from S3 and find the most recent file with correct cityname, amenity, etc + if aws_profilename is None: + session = boto3.Session() + else: + session = boto3.Session(profile_name=aws_profilename) + s3 = session.client('s3') + obj_list = s3.list_objects(Bucket=BUCKET_NAME, Prefix=PREFIX)['Contents'] + objnames = [i['Key'].replace(PREFIX, '') for i in obj_list if len(i['Key'].replace(PREFIX, '')) > 0] + matches = [oname for oname in objnames if oname.split('.')[0].split('_')[:-1] == [cityname, amenityname, travelmode, threshold_type, str(threshold_value)]] + if year is not None: + matches = [oname for oname in matches if oname.split('.')[0].split('_')[-1][:4] == str(year)] + if len(matches) == 0: + raise Exception('No isoline file found.') + matches.sort(key=lambda x: int(x.split('.')[0].split('_')[-1])) + objname = matches[-1] + + # Get data from S3 + url = f'https://{BUCKET_NAME}.s3.us-east-1.amazonaws.com/{PREFIX}{objname}' gdf = gpd.read_file(url) self.gdf = gpd.GeoDataFrame({'is_accessible': [1] * len(gdf), 'geometry': gdf.to_crs('EPSG:4326')['geometry']}).to_crs('EPSG:4326').set_crs('EPSG:4326').set_geometry('geometry') diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index dce9603..7b3c42c 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -353,6 +353,7 @@ def write_layer(path, data): data.to_file(path, driver="GeoJSON") else: raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame") + raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame") def write_dataarray(path, data): # for rasters, need to write to locally first then copy to cloud storage diff --git a/city_metrix/metrics/__init__.py b/city_metrix/metrics/__init__.py index f2a01e3..3d23fe4 100644 --- a/city_metrix/metrics/__init__.py +++ b/city_metrix/metrics/__init__.py @@ -5,4 +5,4 @@ from .urban_open_space import urban_open_space from .natural_areas import natural_areas from .era_5_met_preprocessing import era_5_met_preprocessing -from .percent_population_within_isoline import percent_population_within_isoline \ No newline at end of file +from .percent_population_access import percent_population_access \ No newline at end of file diff --git a/city_metrix/metrics/percent_population_access.py b/city_metrix/metrics/percent_population_access.py new file mode 100644 index 0000000..53a4872 --- /dev/null +++ b/city_metrix/metrics/percent_population_access.py @@ -0,0 +1,60 @@ +from geopandas import GeoDataFrame, GeoSeries +import numpy as np + +from city_metrix.layers.isoline import Isoline +from city_metrix.layers.world_pop import WorldPop + + + + +def percent_population_access(zones: GeoDataFrame, cityname, amenityname, travelmode, threshold_type, threshold_value, isoline_year=2024, agesex_classes=[], worldpop_year=2020, aws_profilename=None) -> GeoSeries: + + def get_accessible_population(access_features_layer, popraster_layer, zones): + if len(access_features_layer.gdf): + result_series = popraster_layer.mask(access_features_layer).groupby(zones).mean() * popraster_layer.mask(access_features_layer).groupby(zones).count() + else: + result_series = pd.Series([0] * len(zones)) + return result_series + + # cityname example: ARG-Buenos-Aires + # amenityname is OSMclass names, in lowercase + # travelmode is walk, bike, automobile, publictransit (only walk implemented for now) + # threshold_type is distance or time + # threshold_value is integer, in minutes or meters + population_layer = WorldPop(agesex_classes=agesex_classes, year=worldpop_year) + params = { + 'cityname': cityname, + 'amenityname': amenityname, + 'travelmode': travelmode, + 'threshold_type': threshold_type, + 'threshold_value': threshold_value, + 'year': isoline_year + } + accesszone_layer = Isoline(params, aws_profilename=aws_profilename) + + try: + access_pop = get_accessible_population(accesszone_layer, population_layer, zones) + total_pop = population_layer.groupby(zones).mean() * population_layer.groupby(zones).count() + result = (access_pop / total_pop) * 100 + + except: + # Sometimes doing entire zones gdf causes groupby to throw empty-GDF error -- workaraound is to go district-by-district + print('Calculating district-by-district') + result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326') + access_fraction = [] + for idx in zones.index: + try: # Sometimes there's still an empty-gdf error + access_pop = get_accessible_population(accesszone_layer, population_layer, zones.loc[[zones.index[idx]]])[0] + total_pop = (population_layer.groupby(zones.loc[[zones.index[idx]]]).mean() * population_layer.groupby(zones.loc[[zones.index[idx]]]).count())[0] + if total_pop != 0: + access_fraction.append(access_pop / total_pop) + else: + access_fraction.append(np.nan) + except: + print('Empty-GDF error for index {0}'.format(idx)) + access_fraction.append(np.nan) + result_gdf['access_fraction'] = access_fraction + result_gdf['access_fraction'].replace([np.inf, -np.inf], np.nan, inplace=True) + result_gdf['access_fraction'] = result_gdf['access_fraction'] * 100 + + return result_gdf \ No newline at end of file diff --git a/city_metrix/metrics/percent_population_within_isoline.py b/city_metrix/metrics/percent_population_within_isoline.py deleted file mode 100644 index 79dcf07..0000000 --- a/city_metrix/metrics/percent_population_within_isoline.py +++ /dev/null @@ -1,43 +0,0 @@ -from geopandas import GeoDataFrame, GeoSeries -import numpy as np - -from city_metrix.layers.isochrone import Isoline -from city_metrix.layers.world_pop import WorldPop - - -def get_accessible_population(access_features_layer, popraster_layer, zones): - if len(access_features_layer.gdf): - result_series = popraster_layer.mask(access_features_layer).groupby(zones).sum() - else: - result_series = pd.Series([0] * len(zones)) - return result_series - -def percent_population_within_isoline(zones: GeoDataFrame, cityname, amenityname, travelmode, threshold_type, threshold_value, isoline_year=2024, agesex_classes=[], worldpop_year=2020) -> GeoSeries: - # cityname example: ARG-Buenos-Aires - # amenityname is OSMclass names, in lowercase - # travelmode is walk, bike, automobile, publictransit (only walk implemented for now) - # threshold_type is distance or time - # threshold_value is integer, in minutes or meters - population_layer = WorldPop(agesex_classes=agesex_classes, worldpop_year=worldpop_year) - accesszone_layer = Isoline(cityname, amenityname, travelmode, threshold_type, threshold_value, year) - - try: - access_pop = get_accessible_population(accesszone_layer, population_layer, zones) - total_pop = population_layer.groupby(zones).sum() - result = (access_pop / total_pop) * 100 - - except: - # Sometimes doing entire zones gdf causes groupby to throw empty-GDF error -- workaraound is to go district-by-district - result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326') - access_fraction = [] - for idx in zones.index: - access_pop = get_accessible_population(accesszone_layer, population_layer, zones.loc[[zones.index[idx]]])[0] - total_pop = population_layer.groupby(zones.loc[[zones.index[idx]]]).sum()[0] - if total_pop != 0: - access_fraction.append(access_pop / total_pop) - else: - access_fraction.append(np.nan) - - result = access_fraction.replace([np.inf,], np.nan) * 100 - - return result \ No newline at end of file From 697c2bc4a9ebbf9a69a620f9fdbc4c0449a4d8d1 Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Thu, 21 Nov 2024 21:39:44 -0500 Subject: [PATCH 4/4] fix bug when district-by-district not necessary --- city_metrix/metrics/percent_population_access.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/city_metrix/metrics/percent_population_access.py b/city_metrix/metrics/percent_population_access.py index 53a4872..edddd3f 100644 --- a/city_metrix/metrics/percent_population_access.py +++ b/city_metrix/metrics/percent_population_access.py @@ -32,15 +32,18 @@ def get_accessible_population(access_features_layer, popraster_layer, zones): } accesszone_layer = Isoline(params, aws_profilename=aws_profilename) + result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326') + try: access_pop = get_accessible_population(accesszone_layer, population_layer, zones) total_pop = population_layer.groupby(zones).mean() * population_layer.groupby(zones).count() result = (access_pop / total_pop) * 100 + result_gdf['access_fraction'] = result except: # Sometimes doing entire zones gdf causes groupby to throw empty-GDF error -- workaraound is to go district-by-district print('Calculating district-by-district') - result_gdf = GeoDataFrame({'geometry': zones['geometry']}).set_geometry('geometry').set_crs('EPSG:4326') + access_fraction = [] for idx in zones.index: try: # Sometimes there's still an empty-gdf error