Skip to content

Commit

Permalink
Merge branch 'main' into feature/overture_building
Browse files Browse the repository at this point in the history
  • Loading branch information
chrowe authored Jul 2, 2024
2 parents b0a2332 + 30735bd commit f79bcda
Show file tree
Hide file tree
Showing 10 changed files with 1,294 additions and 148 deletions.
4 changes: 2 additions & 2 deletions city_metrix/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .land_surface_temperature import LandSurfaceTemperature
from .tree_cover import TreeCover
from .high_land_surface_temperature import HighLandSurfaceTemperature
from .smart_cities_lulc import SmartCitiesLULC
from .smart_surface_lulc import SmartSurfaceLULC
from .open_street_map import OpenStreetMap, OpenStreetMapClass
from .urban_land_use import UrbanLandUse
from .natural_areas import NaturalAreas
Expand All @@ -14,6 +14,6 @@
from .built_up_height import BuiltUpHeight
from .average_net_building_height import AverageNetBuildingHeight
from .open_buildings import OpenBuildings
from .tree_canopy_hight import TreeCanopyHeight
from .tree_canopy_height import TreeCanopyHeight
from .alos_dsm import AlosDSM
from .overture_buildings import OvertureBuildings
303 changes: 303 additions & 0 deletions city_metrix/layers/building_classifier/V2-building-class-data.geojson

Large diffs are not rendered by default.

Binary file not shown.
197 changes: 197 additions & 0 deletions city_metrix/layers/building_classifier/building_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import xarray as xr
import ee
import numpy as np
from rasterio.enums import Resampling
from geocube.api.core import make_geocube
import pandas as pd
import geopandas as gpd
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from xrspatial.classify import reclassify
from exactextract import exact_extract
import pickle

from ..layer import Layer, get_utm_zone_epsg
from ..esa_world_cover import EsaWorldCover, EsaWorldCoverClass
from ..urban_land_use import UrbanLandUse
from ..average_net_building_height import AverageNetBuildingHeight
from ..open_street_map import OpenStreetMap, OpenStreetMapClass
from ..open_buildings import OpenBuildings


class BuildingClassifier(Layer):
def __init__(self, geo_file='V2-building-class-data.geojson', **kwargs):
super().__init__(**kwargs)
self.geo_file = geo_file

def get_data_geo(self):
buildings_sample = gpd.read_file(self.geo_file)
buildings_sample.to_crs(epsg=4326, inplace=True)

return buildings_sample

def get_data_esa_reclass(self, bbox, crs):
# ESA reclass and upsample
esa_world_cover = EsaWorldCover(year=2021).get_data(bbox)

reclass_map = {
EsaWorldCoverClass.TREE_COVER.value: 1,
EsaWorldCoverClass.SHRUBLAND.value: 1,
EsaWorldCoverClass.GRASSLAND.value: 1,
EsaWorldCoverClass.CROPLAND.value: 1,
EsaWorldCoverClass.BUILT_UP.value: 2,
EsaWorldCoverClass.BARE_OR_SPARSE_VEGETATION.value: 3,
EsaWorldCoverClass.SNOW_AND_ICE.value: 20,
EsaWorldCoverClass.PERMANENT_WATER_BODIES.value: 20,
EsaWorldCoverClass.HERBACEOUS_WET_LAND.value: 20,
EsaWorldCoverClass.MANGROVES.value: 20,
EsaWorldCoverClass.MOSS_AND_LICHEN.value: 3
}

# Perform the reclassification
reclassified_esa = reclassify(esa_world_cover, bins=list(reclass_map.keys()), new_values=list(reclass_map.values()))

# Convert to int8 and chunk the data for Dask processing
reclassified_esa = reclassified_esa.astype(np.int8).chunk({'x': 512, 'y': 512})

reclassified_esa = reclassified_esa.rio.write_crs(esa_world_cover.rio.crs, inplace=True)

esa_1m = reclassified_esa.rio.reproject(
dst_crs=crs,
resolution=1,
resampling=Resampling.nearest
)

return esa_1m

def get_data_ulu(self, bbox, crs, snap_to):
# Read ULU land cover, filter to city, select lulc band
ulu_lulc = UrbanLandUse(band='lulc').get_data(bbox)
###### ulu_roads = UrbanLandUse(band='road').get_data(bbox)
####### Create road mask of 50
####### Typical threshold for creating road mask
####### road_mask = ulu_roads >= 50
####### ulu_lulc = ulu_lulc.where(~road_mask, 6)
# 0-Unclassified: 0 (open space)
# 1-Non-residential: 0 (open space), 1 (non-res)
# 2-Residential: 2 (Atomistic), 3 (Informal), 4 (Formal), 5 (Housing project)
####### 3-Roads: 6 (Roads)
mapping = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 2}
for from_val, to_val in mapping.items():
ulu_lulc = ulu_lulc.where(ulu_lulc != from_val, to_val)

# Convert to int8 and chunk the data for Dask processing
ulu_lulc = ulu_lulc.astype(np.int8).chunk({'x': 512, 'y': 512})

####### 1-Non-residential as default
# 0-Unclassified as nodata
ulu_lulc_1m = ulu_lulc.rio.reproject(
dst_crs=crs,
shape=snap_to.shape,
resampling=Resampling.nearest,
nodata=0
)

return ulu_lulc_1m

def get_data_anbh(self, bbox, snap_to):
# Load ANBH layer
anbh_data = AverageNetBuildingHeight().get_data(bbox)

# Chunk the data for Dask processing
anbh_data = anbh_data.chunk({'x': 512, 'y': 512})

# Use reproject_match, because reproject would raise OneDimensionalRaster with shape (1,1)
anbh_1m = anbh_data.rio.reproject_match(
match_data_array=snap_to,
resampling=Resampling.nearest,
nodata=0
)

return anbh_1m

def get_data_buildings(self, bbox, crs):
# OSM buildings
building_osm = OpenStreetMap(osm_class=OpenStreetMapClass.BUILDING).get_data(bbox).to_crs(crs).reset_index(drop=True)
# Google-Microsoft Open Buildings Dataset buildings
openbuilds = OpenBuildings(country='USA').get_data(bbox).to_crs(crs).reset_index(drop=True)

# Intersect buildings and keep the open buildings that don't intersect OSM buildings
intersect_buildings = gpd.sjoin(building_osm, openbuilds, how='inner', predicate='intersects')
openbuilds_non_intersect = openbuilds.loc[~openbuilds.index.isin(intersect_buildings.index)]

buildings = pd.concat([building_osm['geometry'], openbuilds_non_intersect['geometry']], ignore_index=True).reset_index()
# Get rid of any 3d geometries that cause a problem
buildings = buildings[~buildings['geometry'].apply(lambda geom: 'Z' in geom.geom_type)]

# Value not start with 0
buildings['Value'] = buildings['index'] + 1

return buildings

def rasterize_polygon(self, gdf, snap_to):
if gdf.empty:
raster = np.full(snap_to.shape, 0, dtype=np.int8)
raster = xr.DataArray(raster, dims=snap_to.dims, coords=snap_to.coords)

return raster.rio.write_crs(snap_to.rio.crs, inplace=True)

raster = make_geocube(
vector_data=gdf,
measurements=["Value"],
like=snap_to,
fill=np.int8(0)
).Value

return raster.rio.reproject_match(snap_to)


def building_classifier_tree(self):
buildings_sample = self.get_data_geo()

# # Building sample 'V2-building-class-data.geojson' has extracted data and saved in geojson,
# # so no need for steps below
# # Step 1: Get raster data
# bbox = buildings_sample.reset_index().total_bounds
# crs = get_utm_zone_epsg(bbox)
# esa_1m = BuildingClassifier().get_data_esa_reclass(bbox, crs)
# ulu_lulc_1m = self.get_data_ulu(bbox, crs, esa_1m)
# anbh_1m = self.get_data_anbh(bbox, esa_1m)
# # Step 2: Extract 3 features for buildings:
# # 2.1 majority of Urban Land Use(ULU) class
# # 2.2 mean Average Net Building Height(ANBH)
# # 2.3 area of the building
# buildings_sample['ULU'] = exact_extract(ulu_lulc_1m, buildings_sample, ["majority"], output='pandas')['majority']
# buildings_sample['ANBH'] = exact_extract(anbh_1m, buildings_sample, ["mean"], output='pandas')['mean']
# buildings_sample['Area_m'] = buildings_sample.geometry.area

# TODO: classifier parameters
clf = DecisionTreeClassifier(max_depth=5)
# encode labels
buildings_sample['Slope_encoded'] = buildings_sample['Slope'].map({'low': np.int8(42), 'high': np.int8(40)})

# Select these rows for the training set
build_train = buildings_sample[buildings_sample['Model']=='training']

clf.fit(build_train[['ULU', 'ANBH', 'Area_m']], build_train['Slope_encoded'])

# save decision tree classifier
with open('building_classifier.pkl','wb') as f:
pickle.dump(clf, f)

# # Check decision tree and accuracy
# # Select the remaining rows for the testing set
# build_test = buildings_sample[buildings_sample['Model']=='testing']
# plt.figure(figsize=(20, 10))
# plot_tree(clf, feature_names=['ULU', 'ANBH', 'Area_m'], class_names=['low','high'], filled=True)
# plt.show()
# # Predict and evaluate
# y_pred = clf.predict(build_train[['ULU', 'ANBH', 'Area_m']])
# accuracy = accuracy_score(build_train['Slope_encoded'], y_pred)
# print(f"Training Accuracy: {accuracy}")
# len(build_train[build_train['Slope']==build_train['pred']])/len(build_train)
# y_pred = clf.predict(build_test[['ULU', 'ANBH', 'Area_m']])
# accuracy = accuracy_score(build_test['Slope_encoded'], y_pred)
# print(f"Test Accuracy: {accuracy}")
# len(build_test[build_test['Slope']==build_test['pred']])/len(build_test)
21 changes: 0 additions & 21 deletions city_metrix/layers/smart_cities_lulc.py

This file was deleted.

130 changes: 130 additions & 0 deletions city_metrix/layers/smart_surface_lulc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import CAP_STYLE, JOIN_STYLE
from shapely.geometry import box
import psutil
from exactextract import exact_extract
import pickle
import warnings
warnings.filterwarnings('ignore',category=UserWarning)

from .layer import Layer, get_utm_zone_epsg, create_fishnet_grid, MAX_TILE_SIZE
from .open_street_map import OpenStreetMap, OpenStreetMapClass
from .building_classifier.building_classifier import BuildingClassifier


class SmartSurfaceLULC(Layer):
def __init__(self, land_cover_class=None, **kwargs):
super().__init__(**kwargs)
self.land_cover_class = land_cover_class

def get_data(self, bbox):
crs = get_utm_zone_epsg(bbox)

# load building roof slope classifier
with open('city_metrix/layers/building_classifier/building_classifier.pkl', 'rb') as f:
clf = pickle.load(f)

# ESA world cover
esa_1m = BuildingClassifier().get_data_esa_reclass(bbox, crs)

# Open space
open_space_osm = OpenStreetMap(osm_class=OpenStreetMapClass.OPEN_SPACE_HEAT).get_data(bbox).to_crs(crs).reset_index()
open_space_osm['Value'] = np.int8(10)


# Water
water_osm = OpenStreetMap(osm_class=OpenStreetMapClass.WATER).get_data(bbox).to_crs(crs).reset_index()
water_osm['Value'] = np.int8(20)


# Roads
roads_osm = OpenStreetMap(osm_class=OpenStreetMapClass.ROAD).get_data(bbox).to_crs(crs).reset_index()
if len(roads_osm) > 0:
roads_osm['lanes'] = pd.to_numeric(roads_osm['lanes'], errors='coerce')
# Get the average number of lanes per highway class
lanes = (roads_osm.drop(columns='geometry')
.groupby('highway')
# Calculate average and round up
.agg(avg_lanes=('lanes', lambda x: np.ceil(np.nanmean(x)) if not np.isnan(x).all() else np.NaN))
)
# Handle NaN values in avg_lanes
lanes['avg_lanes'] = lanes['avg_lanes'].fillna(2)

# Fill lanes with avg lane value when missing
roads_osm = roads_osm.merge(lanes, on='highway', how='left')
roads_osm['lanes'] = roads_osm['lanes'].fillna(roads_osm['avg_lanes'])

# Add value field (30)
roads_osm['Value'] = np.int8(30)

# Buffer roads by lanes * 10 ft (3.048 m)
# https://nacto.org/publication/urban-street-design-guide/street-design-elements/lane-width/#:~:text=wider%20lane%20widths.-,Lane%20widths%20of%2010%20feet%20are%20appropriate%20in%20urban%20areas,be%20used%20in%20each%20direction
# cap is flat to the terminus of the road
# join style is mitred so intersections are squared
roads_osm['geometry'] = roads_osm.apply(lambda row: row['geometry'].buffer(
row['lanes'] * 3.048,
cap_style=CAP_STYLE.flat,
join_style=JOIN_STYLE.mitre),
axis=1
)
else:
# Add value field (30)
roads_osm['Value'] = np.int8(30)


# Building
ulu_lulc_1m = BuildingClassifier().get_data_ulu(bbox, crs, esa_1m)
anbh_1m = BuildingClassifier().get_data_anbh(bbox, esa_1m)
# get building features
buildings = BuildingClassifier().get_data_buildings(bbox, crs)
# extract ULU, ANBH, and Area_m
buildings['ULU'] = exact_extract(ulu_lulc_1m, buildings, ["majority"], output='pandas')['majority']
buildings['ANBH'] = exact_extract(anbh_1m, buildings, ["mean"], output='pandas')['mean']
buildings['Area_m'] = buildings.geometry.area
# classify buildings
unclassed_buildings = buildings[buildings['ULU'] == 0]
classed_buildings = buildings[buildings['ULU'] != 0]

if len(classed_buildings) > 0:
classed_buildings['Value'] = clf.predict(classed_buildings[['ULU', 'ANBH', 'Area_m']])
# Define conditions and choices
case_when_class = [
# "residential" & "high"
(classed_buildings['Value'] == 40) & (classed_buildings['ULU'] == 2),
# "non-residential" & "high"
(classed_buildings['Value'] == 40) & (classed_buildings['ULU'] == 1),
# "residential" & "low"
(classed_buildings['Value'] == 42) & (classed_buildings['ULU'] == 2),
# "non-residential" & "low"
(classed_buildings['Value'] == 42) & (classed_buildings['ULU'] == 1)
]
case_when_value = [40, 41, 42, 43]
classed_buildings['Value'] = np.select(case_when_class, case_when_value, default=44)
unclassed_buildings['Value'] = 44
buildings = pd.concat([classed_buildings, unclassed_buildings])
else:
buildings['Value'] = 44


# Parking
parking_osm = OpenStreetMap(osm_class=OpenStreetMapClass.PARKING).get_data(bbox).to_crs(crs).reset_index()
parking_osm['Value'] = np.int8(50)


# combine features: open space, water, road, building, parking
feature_df = pd.concat([open_space_osm[['geometry','Value']], water_osm[['geometry','Value']], roads_osm[['geometry','Value']], buildings[['geometry','Value']], parking_osm[['geometry','Value']]], axis=0)
feature_1m = BuildingClassifier().rasterize_polygon(feature_df, esa_1m)

# Combine rasters
datasets = [esa_1m, feature_1m]
# not all raster has 'time', concatenate without 'time' dimension
aligned_datasets = [ds.drop_vars('time', errors='ignore') for ds in datasets]
# use chunk 512x512
aligned_datasets = [ds.chunk({'x': 512, 'y': 512}) for ds in aligned_datasets]
lulc = xr.concat(aligned_datasets, dim='Value').max(dim='Value')
lulc = lulc.compute()

return lulc
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@


class TreeCanopyHeight(Layer):

name = "tree_canopy_hight"

name = "tree_canopy_height"
NO_DATA_VALUE = 0

def __init__(self, **kwargs):
Expand All @@ -20,10 +18,6 @@ def get_data(self, bbox):
# aggregate time series into a single image
canopy_ht = canopy_ht.reduce(ee.Reducer.mean()).rename("cover_code")




data = get_image_collection(ee.ImageCollection(canopy_ht), bbox, 1, "tree canopy hight")
data = get_image_collection(ee.ImageCollection(canopy_ht), bbox, 1, "tree canopy height")

return data.cover_code

2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ dependencies:
- geemap=0.32.0
- pip=23.3.1
- boto3=1.34.124
- scikit-learn=1.5.0
- pip:
- cartoframes==1.2.5
- git+https://github.com/isciences/exactextract
- overturemaps==0.6.0
Loading

0 comments on commit f79bcda

Please sign in to comment.