diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index dce9603..4a96b7a 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -19,7 +19,7 @@ import shapely.geometry as geometry import pandas as pd -MAX_TILE_SIZE = 0.5 +MAX_TILE_SIDE_SIZE_METERS = 60000 # Approx. 0.5 degrees latitudinal offset. TODO How selected? class Layer: def __init__(self, aggregate=None, masks=[]): @@ -55,23 +55,22 @@ def groupby(self, zones, layer=None): """ return LayerGroupBy(self.aggregate, zones, layer, self.masks) - def write(self, bbox, output_path, tile_degrees=None, buffer_meters=None, **kwargs): + def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=None, **kwargs): """ Write the layer to a path. Does not apply masks. :param bbox: (min x, min y, max x, max y) :param output_path: local or s3 path to output to - :param tile_degrees: optional param to tile the results into multiple files with a VRT. - Degrees to tile by. `output_path` should be a folder path to store the tiles. - : param buffer_meters: tile buffer distance in meters + :param tile_side_meters: optional param to tile the results into multiple files specified as tile length on a side in meters + : param tile_buffer_meters: tile buffer distance in meters :return: """ - if tile_degrees is None: + if tile_side_meters is None: clipped_data = self.aggregate.get_data(bbox) write_layer(output_path, clipped_data) else: - tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_degrees, buffer_meters) + tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters) # write raster data to files if not os.path.exists(output_path): @@ -93,14 +92,13 @@ def write(self, bbox, output_path, tile_degrees=None, buffer_meters=None, **kwar _write_tile_grid(unbuffered_tile_grid, output_path, 'tile_grid_unbuffered.geojson') -def _get_tile_boundaries(bbox, tile_degrees, buffer_meters): - has_buffer = True if buffer_meters is not None and buffer_meters != 0 else False +def _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters): + has_buffer = True if tile_buffer_meters is not None and tile_buffer_meters != 0 else False if has_buffer: - lon_degree_offset, lat_degree_offset = meters_to_offset_degrees(bbox, buffer_meters) - tiles = create_fishnet_grid(*bbox, tile_degrees, lon_degree_offset, lat_degree_offset) - unbuffered_tiles = create_fishnet_grid(*bbox, tile_degrees) + tiles = create_fishnet_grid(*bbox, tile_side_meters, tile_buffer_meters) + unbuffered_tiles = create_fishnet_grid(*bbox, tile_side_meters) else: - tiles = create_fishnet_grid(*bbox, tile_degrees) + tiles = create_fishnet_grid(*bbox, tile_side_meters) unbuffered_tiles = None tile_grid = [] @@ -138,7 +136,7 @@ def count(self): return self._zonal_stats("count") def _zonal_stats(self, stats_func): - if box(*self.zones.total_bounds).area <= MAX_TILE_SIZE**2: + if box(*self.zones.total_bounds).area <= MAX_TILE_SIDE_SIZE_METERS**2: stats = self._zonal_stats_tile(self.zones, [stats_func]) else: stats = self._zonal_stats_fishnet(stats_func) @@ -162,7 +160,7 @@ def group_layer_values(df): def _zonal_stats_fishnet(self, stats_func): # fishnet GeoDataFrame into smaller tiles - fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIZE) + fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIDE_SIZE_METERS) # spatial join with fishnet grid and then intersect geometries with fishnet tiles joined = self.zones.sjoin(fishnet) @@ -259,30 +257,39 @@ def get_utm_zone_epsg(bbox) -> str: return f"EPSG:{epsg}" -def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size, lon_degree_buffer=0, lat_degree_buffer=0): - x, y = (min_x, min_y) +def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, tile_side_meters, tile_buffer_meters=0): + lon_coord, lat_coord = (min_lon, min_lat) geom_array = [] + center_lat = (min_lat + max_lat) / 2 + lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, tile_side_meters) + if tile_buffer_meters == 0: + lon_buffer_offset = 0 + lat_buffer_offset = 0 + else: + lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, tile_buffer_meters) + # Polygon Size - while y < max_y: - while x < max_x: - cell_min_x = x - lon_degree_buffer - cell_min_y = y - lat_degree_buffer - cell_max_x = x + cell_size + lon_degree_buffer - cell_max_y = y + cell_size + lat_degree_buffer + while lat_coord < max_lat: + while lon_coord < max_lon: + cell_min_lon = lon_coord - lon_buffer_offset + cell_min_lat = lat_coord - lat_buffer_offset + cell_max_lon = lon_coord + lon_side_offset + lon_buffer_offset + cell_max_lat = lat_coord + lat_side_offset + lat_buffer_offset geom = geometry.Polygon( [ - (cell_min_x, cell_min_y), - (cell_min_x, cell_max_y), - (cell_max_x, cell_max_y), - (cell_max_x, cell_min_y), - (cell_min_x, cell_min_y), + (cell_min_lon, cell_min_lat), + (cell_min_lon, cell_max_lat), + (cell_max_lon, cell_max_lat), + (cell_max_lon, cell_min_lat), + (cell_min_lon, cell_min_lat), ] ) geom_array.append(geom) - x += cell_size - x = min_x - y += cell_size + lon_coord += lon_side_offset + lon_coord = min_lon + + lat_coord += lat_side_offset fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs("EPSG:4326") fishnet["fishnet_geometry"] = fishnet["geometry"] @@ -367,14 +374,12 @@ def write_dataarray(path, data): else: data.rio.to_raster(raster_path=path, driver="COG") -def meters_to_offset_degrees(bbox, offset_meters): - min_lat = bbox[1] - max_lat = bbox[3] - center_lat = (min_lat + max_lat) / 2 +def offset_meters_to_geographic_degrees(decimal_latitude, length_m): + # TODO consider replacing this spherical calculation with ellipsoidal + earth_radius_m = 6378137 + rad = 180/math.pi - meters_per_degree_of_lat = 111111 - earth_circumference_meters = 40075000 - lon_degree_offset = offset_meters/ (earth_circumference_meters * math.cos(center_lat) / 360) - lat_degree_offset = offset_meters / meters_per_degree_of_lat + lon_degree_offset = abs((length_m / (earth_radius_m * math.cos(math.pi*decimal_latitude/180))) * rad) + lat_degree_offset = abs((length_m / earth_radius_m) * rad) - return abs(lon_degree_offset), abs(lat_degree_offset) + return lon_degree_offset, lat_degree_offset diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py index c31c796..1467cd0 100644 --- a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py +++ b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py @@ -31,7 +31,7 @@ def test_write_albedo(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'albedo.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) - Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') @@ -39,63 +39,65 @@ def test_write_albedo_tiled_unbuffered(target_folder, bbox_info, target_spatial_ file_path = prep_output_path(target_folder, 'albedo_tiled.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) (Albedo(spatial_resolution=target_resolution). - write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=None)) + write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=None)) file_count = get_file_count_in_folder(file_path) - assert file_count == 5 + expected_file_count = 7 # includes 6 tiles and one geojson file + assert file_count == expected_file_count @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_albedo_tiled_buffered(target_folder, bbox_info, target_spatial_resolution_multiplier): buffer_meters = 500 - file_path = prep_output_path(target_folder, 'albedo_tiled.tif') + file_path = prep_output_path(target_folder, 'albedo_tiled_buffered.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) (Albedo(spatial_resolution=target_resolution). - write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=buffer_meters)) + write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=buffer_meters)) file_count = get_file_count_in_folder(file_path) - assert file_count == 6 + expected_file_count = 8 # includes 6 tiles and two geojson files + assert file_count == expected_file_count @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_alos_dsm(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'alos_dsm.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AlosDSM()) - AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_average_net_building_height(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'average_net_building_height.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AverageNetBuildingHeight()) - AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_esa_world_cover(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'esa_world_cover.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(EsaWorldCover()) - EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_high_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'high_land_surface_temperature.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(HighLandSurfaceTemperature()) - HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_impervious_surface(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'impervious_surface.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(ImperviousSurface()) - LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'land_surface_temperature.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(LandSurfaceTemperature()) - LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) # TODO Class is no longer used, but may be useful later @@ -110,27 +112,27 @@ def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial def test_write_nasa_dem(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'nasa_dem.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NasaDEM()) - NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_natural_areas(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'natural_areas.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NaturalAreas()) - NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_ndvi_sentinel2_gee(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'ndvi_sentinel2_gee.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NdviSentinel2()) - NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'open_buildings.geojson') - OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_degrees=None) + OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) # TODO Class write is not functional. Is class still needed or have we switched to overture? @@ -143,7 +145,7 @@ def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'overture_buildings.geojson') - OvertureBuildings().write(bbox_info.bounds, file_path, tile_degrees=None) + OvertureBuildings().write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) # TODO Class is no longer used, but may be useful later @@ -158,33 +160,33 @@ def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resol def test_write_smart_surface_lulc(target_folder, bbox_info, target_spatial_resolution_multiplier): # Note: spatial_resolution not implemented for this raster class file_path = prep_output_path(target_folder, 'smart_surface_lulc.tif') - SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_degrees=None) + SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_tree_canopy_height(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'tree_canopy_height.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCanopyHeight()) - TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_tree_cover(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'tree_cover.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCover()) - TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_urban_land_use(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'urban_land_use.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(UrbanLandUse()) - UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_world_pop(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'world_pop.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(WorldPop()) - WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) + WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) assert verify_file_is_populated(file_path) diff --git a/tests/test_methods.py b/tests/test_methods.py index 470a450..bb7ad39 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -1,5 +1,5 @@ import numpy as np -from city_metrix.layers.layer import create_fishnet_grid, meters_to_offset_degrees +from city_metrix.layers.layer import create_fishnet_grid, offset_meters_to_geographic_degrees from .conftest import ( LARGE_ZONES, ZONES, @@ -38,27 +38,23 @@ def test_fishnet(): min_y = -80.1 max_x = -38.2 max_y = -80.0 - cell_size = 0.01 - lon_degree_buffer = 0.004 - lat_degree_buffer = 0.004 + tile_side_meters = 1000 + tile_buffer_meters = 100 result_fishnet = ( - create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size, lon_degree_buffer, lat_degree_buffer)) + create_fishnet_grid(min_x, min_y, max_x, max_y, tile_side_meters, tile_buffer_meters)) actual_count = result_fishnet.geometry.count() - expected_count = 110 + expected_count = 24 assert actual_count == expected_count -def test_meters_to_offset_degrees(): - # random high-latitude location where long offset is > lat offset - bbox = (-38.355, -82.821, -38.338, -82.804) - offset_meters = 500 - lon_degree_offset, lat_degree_offset = meters_to_offset_degrees(bbox, offset_meters) - round_lon_degree_offset = round(lon_degree_offset, 4) - round_lat_degree_offset = round(lat_degree_offset, 4) +def test_meters_to_offset_degrees(): + decimal_latitude = 45 + length_m = 100 + lon_degree_offset, lat_degree_offset = offset_meters_to_geographic_degrees(decimal_latitude, length_m) - assert round_lon_degree_offset > round_lat_degree_offset - assert round_lon_degree_offset == 0.0106 + assert round(lon_degree_offset,5) == 0.00127 + assert round(lat_degree_offset,5) == 0.0009 def test_masks():