diff --git a/.github/requirements.txt b/.github/requirements.txt index 6b06f181..aff3f1d9 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -15,6 +15,6 @@ s3fs==2024.5.0 geemap==0.32.0 pip==23.3.1 boto3==1.34.124 -scikit-learn==1.5.0 +scikit-learn==1.5.1 overturemaps==0.6.0 exactextract==0.2.0.dev252 diff --git a/city_metrix/layers/smart_surface_lulc.py b/city_metrix/layers/smart_surface_lulc.py index 2ab2a020..1f394963 100644 --- a/city_metrix/layers/smart_surface_lulc.py +++ b/city_metrix/layers/smart_surface_lulc.py @@ -13,6 +13,7 @@ from .layer import Layer, get_utm_zone_epsg, create_fishnet_grid, MAX_TILE_SIZE from .open_street_map import OpenStreetMap, OpenStreetMapClass +from .overture_buildings import OvertureBuildings from ..models.building_classifier.building_classifier import BuildingClassifier @@ -33,12 +34,12 @@ def get_data(self, bbox): # Open space open_space_osm = OpenStreetMap(osm_class=OpenStreetMapClass.OPEN_SPACE_HEAT).get_data(bbox).to_crs(crs).reset_index() - open_space_osm['Value'] = np.int8(10) + open_space_osm['Value'] = 10 # Water water_osm = OpenStreetMap(osm_class=OpenStreetMapClass.WATER).get_data(bbox).to_crs(crs).reset_index() - water_osm['Value'] = np.int8(20) + water_osm['Value'] = 20 # Roads @@ -59,7 +60,7 @@ def get_data(self, bbox): roads_osm['lanes'] = roads_osm['lanes'].fillna(roads_osm['avg_lanes']) # Add value field (30) - roads_osm['Value'] = np.int8(30) + roads_osm['Value'] = 30 # Buffer roads by lanes * 10 ft (3.048 m) # https://nacto.org/publication/urban-street-design-guide/street-design-elements/lane-width/#:~:text=wider%20lane%20widths.-,Lane%20widths%20of%2010%20feet%20are%20appropriate%20in%20urban%20areas,be%20used%20in%20each%20direction @@ -73,14 +74,14 @@ def get_data(self, bbox): ) else: # Add value field (30) - roads_osm['Value'] = np.int8(30) + roads_osm['Value'] = 30 # Building ulu_lulc_1m = BuildingClassifier().get_data_ulu(bbox, crs, esa_1m) anbh_1m = BuildingClassifier().get_data_anbh(bbox, esa_1m) # get building features - buildings = BuildingClassifier().get_data_buildings(bbox, crs) + buildings = OvertureBuildings().get_data(bbox).to_crs(crs) # extract ULU, ANBH, and Area_m buildings['ULU'] = exact_extract(ulu_lulc_1m, buildings, ["majority"], output='pandas')['majority'] buildings['ANBH'] = exact_extract(anbh_1m, buildings, ["mean"], output='pandas')['mean'] @@ -112,7 +113,7 @@ def get_data(self, bbox): # Parking parking_osm = OpenStreetMap(osm_class=OpenStreetMapClass.PARKING).get_data(bbox).to_crs(crs).reset_index() - parking_osm['Value'] = np.int8(50) + parking_osm['Value'] = 50 # combine features: open space, water, road, building, parking diff --git a/city_metrix/models/building_classifier/building_classifier.pkl b/city_metrix/models/building_classifier/building_classifier.pkl index d1cee215..4907a22b 100644 Binary files a/city_metrix/models/building_classifier/building_classifier.pkl and b/city_metrix/models/building_classifier/building_classifier.pkl differ diff --git a/city_metrix/models/building_classifier/building_classifier.py b/city_metrix/models/building_classifier/building_classifier.py index 22415096..816b690b 100644 --- a/city_metrix/models/building_classifier/building_classifier.py +++ b/city_metrix/models/building_classifier/building_classifier.py @@ -16,8 +16,6 @@ from ...layers.esa_world_cover import EsaWorldCover, EsaWorldCoverClass from ...layers.urban_land_use import UrbanLandUse from ...layers.average_net_building_height import AverageNetBuildingHeight -from ...layers.open_street_map import OpenStreetMap, OpenStreetMapClass -from ...layers.open_buildings import OpenBuildings class BuildingClassifier(Layer): @@ -52,8 +50,8 @@ def get_data_esa_reclass(self, bbox, crs): # Perform the reclassification reclassified_esa = reclassify(esa_world_cover, bins=list(reclass_map.keys()), new_values=list(reclass_map.values())) - # Convert to int8 and chunk the data for Dask processing - reclassified_esa = reclassified_esa.astype(np.int8).chunk({'x': 512, 'y': 512}) + # Chunk the data for Dask processing + reclassified_esa = reclassified_esa.chunk({'x': 512, 'y': 512}) reclassified_esa = reclassified_esa.rio.write_crs(esa_world_cover.rio.crs, inplace=True) @@ -81,8 +79,8 @@ def get_data_ulu(self, bbox, crs, snap_to): for from_val, to_val in mapping.items(): ulu_lulc = ulu_lulc.where(ulu_lulc != from_val, to_val) - # Convert to int8 and chunk the data for Dask processing - ulu_lulc = ulu_lulc.astype(np.int8).chunk({'x': 512, 'y': 512}) + # Chunk the data for Dask processing + ulu_lulc = ulu_lulc.chunk({'x': 512, 'y': 512}) ####### 1-Non-residential as default # 0-Unclassified as nodata @@ -111,28 +109,10 @@ def get_data_anbh(self, bbox, snap_to): return anbh_1m - def get_data_buildings(self, bbox, crs): - # OSM buildings - building_osm = OpenStreetMap(osm_class=OpenStreetMapClass.BUILDING).get_data(bbox).to_crs(crs).reset_index(drop=True) - # Google-Microsoft Open Buildings Dataset buildings - openbuilds = OpenBuildings(country='USA').get_data(bbox).to_crs(crs).reset_index(drop=True) - - # Intersect buildings and keep the open buildings that don't intersect OSM buildings - intersect_buildings = gpd.sjoin(building_osm, openbuilds, how='inner', predicate='intersects') - openbuilds_non_intersect = openbuilds.loc[~openbuilds.index.isin(intersect_buildings.index)] - - buildings = pd.concat([building_osm['geometry'], openbuilds_non_intersect['geometry']], ignore_index=True).reset_index() - # Get rid of any 3d geometries that cause a problem - buildings = buildings[~buildings['geometry'].apply(lambda geom: 'Z' in geom.geom_type)] - - # Value not start with 0 - buildings['Value'] = buildings['index'] + 1 - - return buildings def rasterize_polygon(self, gdf, snap_to): if gdf.empty: - raster = np.full(snap_to.shape, 0, dtype=np.int8) + raster = np.full(snap_to.shape, 0) raster = xr.DataArray(raster, dims=snap_to.dims, coords=snap_to.coords) return raster.rio.write_crs(snap_to.rio.crs, inplace=True) @@ -141,7 +121,7 @@ def rasterize_polygon(self, gdf, snap_to): vector_data=gdf, measurements=["Value"], like=snap_to, - fill=np.int8(0) + fill=0 ).Value return raster.rio.reproject_match(snap_to) @@ -169,7 +149,7 @@ def building_classifier_tree(self): # set classifier parameters clf = DecisionTreeClassifier(max_depth=5) # encode labels - buildings_sample['Slope_encoded'] = buildings_sample['Slope'].map({'low': np.int8(42), 'high': np.int8(40)}) + buildings_sample['Slope_encoded'] = buildings_sample['Slope'].map({'low': 42, 'high': 40}) # Select these rows for the training set build_train = buildings_sample[buildings_sample['Model']=='training'] diff --git a/environment.yml b/environment.yml index 04596084..37b823ce 100644 --- a/environment.yml +++ b/environment.yml @@ -21,7 +21,7 @@ dependencies: - geemap=0.32.0 - pip=23.3.1 - boto3=1.34.124 - - scikit-learn=1.5.0 + - scikit-learn=1.5.1 - exactextract=0.2.0.dev252 - pip: - overturemaps==0.6.0 diff --git a/setup.py b/setup.py index 179b5952..960c1f4e 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,6 @@ "boto3", "exactextract<=0.2.0.dev252", "overturemaps", - "scikit-learn>=1.5.0", + "scikit-learn>=1.5.1", ], )