Merge branch 'main' into feature/overture_building

wri · Jul 2, 2024 · f79bcda · f79bcda
2 parents b0a2332 + 30735bd
commit f79bcda
Show file tree

Hide file tree

Showing 10 changed files with 1,294 additions and 148 deletions.
diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py
@@ -3,7 +3,7 @@
 from .land_surface_temperature import LandSurfaceTemperature
 from .tree_cover import TreeCover
 from .high_land_surface_temperature import HighLandSurfaceTemperature
-from .smart_cities_lulc import SmartCitiesLULC
+from .smart_surface_lulc import SmartSurfaceLULC
 from .open_street_map import OpenStreetMap, OpenStreetMapClass
 from .urban_land_use import UrbanLandUse
 from .natural_areas import NaturalAreas
@@ -14,6 +14,6 @@
 from .built_up_height import BuiltUpHeight
 from .average_net_building_height import AverageNetBuildingHeight
 from .open_buildings import OpenBuildings
-from .tree_canopy_hight import TreeCanopyHeight
+from .tree_canopy_height import TreeCanopyHeight
 from .alos_dsm import AlosDSM
 from .overture_buildings import OvertureBuildings
diff --git a/city_metrix/layers/building_classifier/V2-building-class-data.geojson b/city_metrix/layers/building_classifier/V2-building-class-data.geojson
diff --git a/city_metrix/layers/building_classifier/building_classifier.pkl b/city_metrix/layers/building_classifier/building_classifier.pkl
diff --git a/city_metrix/layers/building_classifier/building_classifier.py b/city_metrix/layers/building_classifier/building_classifier.py
@@ -0,0 +1,197 @@
+import xarray as xr
+import ee
+import numpy as np
+from rasterio.enums import Resampling
+from geocube.api.core import make_geocube
+import pandas as pd
+import geopandas as gpd
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+from xrspatial.classify import reclassify
+from exactextract import exact_extract
+import pickle
+
+from ..layer import Layer, get_utm_zone_epsg
+from ..esa_world_cover import EsaWorldCover, EsaWorldCoverClass
+from ..urban_land_use import UrbanLandUse
+from ..average_net_building_height import AverageNetBuildingHeight
+from ..open_street_map import OpenStreetMap, OpenStreetMapClass
+from ..open_buildings import OpenBuildings
+
+
+class BuildingClassifier(Layer):
+    def __init__(self, geo_file='V2-building-class-data.geojson', **kwargs):
+        super().__init__(**kwargs)
+        self.geo_file = geo_file
+
+    def get_data_geo(self):
+        buildings_sample = gpd.read_file(self.geo_file)
+        buildings_sample.to_crs(epsg=4326, inplace=True)
+
+        return buildings_sample
+
+    def get_data_esa_reclass(self, bbox, crs):
+        # ESA reclass and upsample
+        esa_world_cover = EsaWorldCover(year=2021).get_data(bbox)
+
+        reclass_map = {
+            EsaWorldCoverClass.TREE_COVER.value: 1,
+            EsaWorldCoverClass.SHRUBLAND.value: 1,
+            EsaWorldCoverClass.GRASSLAND.value: 1,
+            EsaWorldCoverClass.CROPLAND.value: 1,
+            EsaWorldCoverClass.BUILT_UP.value: 2,
+            EsaWorldCoverClass.BARE_OR_SPARSE_VEGETATION.value: 3,
+            EsaWorldCoverClass.SNOW_AND_ICE.value: 20,
+            EsaWorldCoverClass.PERMANENT_WATER_BODIES.value: 20,
+            EsaWorldCoverClass.HERBACEOUS_WET_LAND.value: 20,
+            EsaWorldCoverClass.MANGROVES.value: 20,
+            EsaWorldCoverClass.MOSS_AND_LICHEN.value: 3
+        }
+
+        # Perform the reclassification
+        reclassified_esa = reclassify(esa_world_cover, bins=list(reclass_map.keys()), new_values=list(reclass_map.values()))
+
+        # Convert to int8 and chunk the data for Dask processing
+        reclassified_esa = reclassified_esa.astype(np.int8).chunk({'x': 512, 'y': 512})
+
+        reclassified_esa = reclassified_esa.rio.write_crs(esa_world_cover.rio.crs, inplace=True)
+
+        esa_1m = reclassified_esa.rio.reproject(
+            dst_crs=crs,
+            resolution=1,
+            resampling=Resampling.nearest
+        )
+
+        return esa_1m
+
+    def get_data_ulu(self, bbox, crs, snap_to):
+        # Read ULU land cover, filter to city, select lulc band
+        ulu_lulc = UrbanLandUse(band='lulc').get_data(bbox)
+        ###### ulu_roads = UrbanLandUse(band='road').get_data(bbox)
+        ####### Create road mask of 50
+        ####### Typical threshold for creating road mask
+        ####### road_mask = ulu_roads >= 50
+        ####### ulu_lulc = ulu_lulc.where(~road_mask, 6)
+        # 0-Unclassified: 0 (open space)
+        # 1-Non-residential: 0 (open space), 1 (non-res)
+        # 2-Residential: 2 (Atomistic), 3 (Informal), 4 (Formal), 5 (Housing project)
+        ####### 3-Roads: 6 (Roads)
+        mapping = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 2}
+        for from_val, to_val in mapping.items():
+            ulu_lulc = ulu_lulc.where(ulu_lulc != from_val, to_val)
+
+        # Convert to int8 and chunk the data for Dask processing
+        ulu_lulc = ulu_lulc.astype(np.int8).chunk({'x': 512, 'y': 512})
+
+        ####### 1-Non-residential as default
+        # 0-Unclassified as nodata 
+        ulu_lulc_1m = ulu_lulc.rio.reproject(
+            dst_crs=crs,
+            shape=snap_to.shape,
+            resampling=Resampling.nearest,
+            nodata=0
+        )
+
+        return ulu_lulc_1m
+
+    def get_data_anbh(self, bbox, snap_to):
+        # Load ANBH layer
+        anbh_data = AverageNetBuildingHeight().get_data(bbox)
+
+        # Chunk the data for Dask processing
+        anbh_data = anbh_data.chunk({'x': 512, 'y': 512})
+
+        # Use reproject_match, because reproject would raise OneDimensionalRaster with shape (1,1)
+        anbh_1m = anbh_data.rio.reproject_match(
+            match_data_array=snap_to,
+            resampling=Resampling.nearest,
+            nodata=0
+        )
+
+        return anbh_1m
+
+    def get_data_buildings(self, bbox, crs):
+        # OSM buildings
+        building_osm = OpenStreetMap(osm_class=OpenStreetMapClass.BUILDING).get_data(bbox).to_crs(crs).reset_index(drop=True)
+        # Google-Microsoft Open Buildings Dataset buildings
+        openbuilds = OpenBuildings(country='USA').get_data(bbox).to_crs(crs).reset_index(drop=True)
+
+        # Intersect buildings and keep the open buildings that don't intersect OSM buildings
+        intersect_buildings = gpd.sjoin(building_osm, openbuilds, how='inner', predicate='intersects')
+        openbuilds_non_intersect = openbuilds.loc[~openbuilds.index.isin(intersect_buildings.index)]
+
+        buildings = pd.concat([building_osm['geometry'], openbuilds_non_intersect['geometry']], ignore_index=True).reset_index()
+        # Get rid of any 3d geometries that cause a problem
+        buildings = buildings[~buildings['geometry'].apply(lambda geom: 'Z' in geom.geom_type)]
+
+        # Value not start with 0
+        buildings['Value'] = buildings['index'] + 1
+
+        return buildings
+
+    def rasterize_polygon(self, gdf, snap_to):
+        if gdf.empty:
+            raster = np.full(snap_to.shape, 0, dtype=np.int8)
+            raster = xr.DataArray(raster, dims=snap_to.dims, coords=snap_to.coords)
+
+            return raster.rio.write_crs(snap_to.rio.crs, inplace=True)
+
+        raster = make_geocube(
+            vector_data=gdf,
+            measurements=["Value"],
+            like=snap_to,
+            fill=np.int8(0)
+        ).Value
+
+        return raster.rio.reproject_match(snap_to)
+
+
+    def building_classifier_tree(self):
+        buildings_sample = self.get_data_geo()
+
+        # # Building sample 'V2-building-class-data.geojson' has extracted data and saved in geojson,
+        # # so no need for steps below
+        # # Step 1: Get raster data
+        # bbox = buildings_sample.reset_index().total_bounds
+        # crs = get_utm_zone_epsg(bbox)
+        # esa_1m = BuildingClassifier().get_data_esa_reclass(bbox, crs)
+        # ulu_lulc_1m = self.get_data_ulu(bbox, crs, esa_1m)
+        # anbh_1m = self.get_data_anbh(bbox, esa_1m)
+        # # Step 2: Extract 3 features for buildings:
+        # # 2.1 majority of Urban Land Use(ULU) class
+        # # 2.2 mean Average Net Building Height(ANBH)
+        # # 2.3 area of the building
+        # buildings_sample['ULU'] = exact_extract(ulu_lulc_1m, buildings_sample, ["majority"], output='pandas')['majority']
+        # buildings_sample['ANBH'] = exact_extract(anbh_1m, buildings_sample, ["mean"], output='pandas')['mean']
+        # buildings_sample['Area_m'] = buildings_sample.geometry.area
+
+        # TODO: classifier parameters
+        clf = DecisionTreeClassifier(max_depth=5)
+        # encode labels
+        buildings_sample['Slope_encoded'] = buildings_sample['Slope'].map({'low': np.int8(42), 'high': np.int8(40)})
+
+        # Select these rows for the training set
+        build_train = buildings_sample[buildings_sample['Model']=='training']
+
+        clf.fit(build_train[['ULU', 'ANBH', 'Area_m']], build_train['Slope_encoded'])
+
+        # save decision tree classifier
+        with open('building_classifier.pkl','wb') as f:
+            pickle.dump(clf, f)
+
+        # # Check decision tree and accuracy
+        # # Select the remaining rows for the testing set
+        # build_test = buildings_sample[buildings_sample['Model']=='testing']
+        # plt.figure(figsize=(20, 10))
+        # plot_tree(clf, feature_names=['ULU', 'ANBH', 'Area_m'], class_names=['low','high'], filled=True)
+        # plt.show()
+        # # Predict and evaluate
+        # y_pred = clf.predict(build_train[['ULU', 'ANBH', 'Area_m']])
+        # accuracy = accuracy_score(build_train['Slope_encoded'], y_pred)
+        # print(f"Training Accuracy: {accuracy}")
+        # len(build_train[build_train['Slope']==build_train['pred']])/len(build_train)
+        # y_pred = clf.predict(build_test[['ULU', 'ANBH', 'Area_m']])
+        # accuracy = accuracy_score(build_test['Slope_encoded'], y_pred)
+        # print(f"Test Accuracy: {accuracy}")
+        # len(build_test[build_test['Slope']==build_test['pred']])/len(build_test)
diff --git a/city_metrix/layers/smart_cities_lulc.py b/city_metrix/layers/smart_cities_lulc.py
diff --git a/city_metrix/layers/smart_surface_lulc.py b/city_metrix/layers/smart_surface_lulc.py
@@ -0,0 +1,130 @@
+import xarray as xr
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import CAP_STYLE, JOIN_STYLE
+from shapely.geometry import box
+import psutil
+from exactextract import exact_extract
+import pickle
+import warnings
+warnings.filterwarnings('ignore',category=UserWarning)
+
+from .layer import Layer, get_utm_zone_epsg, create_fishnet_grid, MAX_TILE_SIZE
+from .open_street_map import OpenStreetMap, OpenStreetMapClass
+from .building_classifier.building_classifier import BuildingClassifier
+
+
+class SmartSurfaceLULC(Layer):
+    def __init__(self, land_cover_class=None, **kwargs):
+        super().__init__(**kwargs)
+        self.land_cover_class = land_cover_class
+
+    def get_data(self, bbox):
+        crs = get_utm_zone_epsg(bbox)
+
+        # load building roof slope classifier
+        with open('city_metrix/layers/building_classifier/building_classifier.pkl', 'rb') as f:
+            clf = pickle.load(f)
+
+        # ESA world cover
+        esa_1m = BuildingClassifier().get_data_esa_reclass(bbox, crs)
+
+        # Open space
+        open_space_osm = OpenStreetMap(osm_class=OpenStreetMapClass.OPEN_SPACE_HEAT).get_data(bbox).to_crs(crs).reset_index()
+        open_space_osm['Value'] = np.int8(10)
+
+
+        # Water
+        water_osm = OpenStreetMap(osm_class=OpenStreetMapClass.WATER).get_data(bbox).to_crs(crs).reset_index()
+        water_osm['Value'] = np.int8(20)
+
+
+        # Roads
+        roads_osm = OpenStreetMap(osm_class=OpenStreetMapClass.ROAD).get_data(bbox).to_crs(crs).reset_index()
+        if len(roads_osm) > 0:
+            roads_osm['lanes'] = pd.to_numeric(roads_osm['lanes'], errors='coerce')
+            # Get the average number of lanes per highway class
+            lanes = (roads_osm.drop(columns='geometry')
+                    .groupby('highway')
+                    # Calculate average and round up
+                    .agg(avg_lanes=('lanes', lambda x: np.ceil(np.nanmean(x)) if not np.isnan(x).all() else np.NaN))
+                    )
+            # Handle NaN values in avg_lanes
+            lanes['avg_lanes'] = lanes['avg_lanes'].fillna(2)
+
+            # Fill lanes with avg lane value when missing
+            roads_osm = roads_osm.merge(lanes, on='highway', how='left')
+            roads_osm['lanes'] = roads_osm['lanes'].fillna(roads_osm['avg_lanes'])
+
+            # Add value field (30)
+            roads_osm['Value'] = np.int8(30)
+
+            # Buffer roads by lanes * 10 ft (3.048 m)
+            # https://nacto.org/publication/urban-street-design-guide/street-design-elements/lane-width/#:~:text=wider%20lane%20widths.-,Lane%20widths%20of%2010%20feet%20are%20appropriate%20in%20urban%20areas,be%20used%20in%20each%20direction
+            # cap is flat to the terminus of the road
+            # join style is mitred so intersections are squared
+            roads_osm['geometry'] = roads_osm.apply(lambda row: row['geometry'].buffer(
+                row['lanes'] * 3.048,
+                cap_style=CAP_STYLE.flat,
+                join_style=JOIN_STYLE.mitre),
+                axis=1
+            )
+        else:
+            # Add value field (30)
+            roads_osm['Value'] = np.int8(30)
+
+
+        # Building
+        ulu_lulc_1m = BuildingClassifier().get_data_ulu(bbox, crs, esa_1m)
+        anbh_1m = BuildingClassifier().get_data_anbh(bbox, esa_1m)
+        # get building features
+        buildings = BuildingClassifier().get_data_buildings(bbox, crs)
+        # extract ULU, ANBH, and Area_m
+        buildings['ULU'] = exact_extract(ulu_lulc_1m, buildings, ["majority"], output='pandas')['majority']
+        buildings['ANBH'] = exact_extract(anbh_1m, buildings, ["mean"], output='pandas')['mean']
+        buildings['Area_m'] = buildings.geometry.area
+        # classify buildings
+        unclassed_buildings = buildings[buildings['ULU'] == 0]
+        classed_buildings = buildings[buildings['ULU'] != 0]
+
+        if len(classed_buildings) > 0:
+            classed_buildings['Value'] = clf.predict(classed_buildings[['ULU', 'ANBH', 'Area_m']])
+            # Define conditions and choices
+            case_when_class = [
+                # "residential" & "high"
+                (classed_buildings['Value'] == 40) & (classed_buildings['ULU'] == 2),
+                # "non-residential" & "high"
+                (classed_buildings['Value'] == 40) & (classed_buildings['ULU'] == 1),
+                # "residential" & "low"
+                (classed_buildings['Value'] == 42) & (classed_buildings['ULU'] == 2),
+                # "non-residential" & "low"
+                (classed_buildings['Value'] == 42) & (classed_buildings['ULU'] == 1)
+            ]
+            case_when_value = [40, 41, 42, 43]
+            classed_buildings['Value'] = np.select(case_when_class, case_when_value, default=44)
+            unclassed_buildings['Value'] = 44
+            buildings = pd.concat([classed_buildings, unclassed_buildings])
+        else:
+            buildings['Value'] = 44
+
+
+        # Parking
+        parking_osm = OpenStreetMap(osm_class=OpenStreetMapClass.PARKING).get_data(bbox).to_crs(crs).reset_index()
+        parking_osm['Value'] = np.int8(50)
+
+
+        # combine features: open space, water, road, building, parking
+        feature_df = pd.concat([open_space_osm[['geometry','Value']], water_osm[['geometry','Value']], roads_osm[['geometry','Value']], buildings[['geometry','Value']], parking_osm[['geometry','Value']]], axis=0)
+        feature_1m = BuildingClassifier().rasterize_polygon(feature_df, esa_1m)
+
+        # Combine rasters
+        datasets = [esa_1m, feature_1m]
+        # not all raster has 'time', concatenate without 'time' dimension
+        aligned_datasets = [ds.drop_vars('time', errors='ignore') for ds in datasets]
+        # use chunk 512x512
+        aligned_datasets = [ds.chunk({'x': 512, 'y': 512}) for ds in aligned_datasets]
+        lulc = xr.concat(aligned_datasets, dim='Value').max(dim='Value')
+        lulc = lulc.compute()
+
+        return lulc
diff --git a/city_metrix/layers/tree_canopy_hight.py → city_metrix/layers/tree_canopy_height.py b/city_metrix/layers/tree_canopy_hight.py → city_metrix/layers/tree_canopy_height.py
@@ -7,9 +7,7 @@
 
 
 class TreeCanopyHeight(Layer):
-
-    name = "tree_canopy_hight"
-
+    name = "tree_canopy_height"
     NO_DATA_VALUE = 0
 
     def __init__(self, **kwargs):
@@ -20,10 +18,6 @@ def get_data(self, bbox):
         # aggregate time series into a single image
         canopy_ht = canopy_ht.reduce(ee.Reducer.mean()).rename("cover_code")
 
-
-
-
-        data = get_image_collection(ee.ImageCollection(canopy_ht), bbox, 1, "tree canopy hight")
+        data = get_image_collection(ee.ImageCollection(canopy_ht), bbox, 1, "tree canopy height")
 
         return data.cover_code
-
diff --git a/environment.yml b/environment.yml
@@ -21,6 +21,8 @@ dependencies:
   - geemap=0.32.0
   - pip=23.3.1
   - boto3=1.34.124
+  - scikit-learn=1.5.0
   - pip:
     - cartoframes==1.2.5
+    - git+https://github.com/isciences/exactextract
     - overturemaps==0.6.0