Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cif 292 add optional buffering of tiles #85

Merged
merged 7 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 72 additions & 29 deletions city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import ee
import boto3
import math
from dask.diagnostics import ProgressBar
from ee import ImageCollection
from geocube.api.core import make_geocube
Expand Down Expand Up @@ -54,48 +55,74 @@ def groupby(self, zones, layer=None):
"""
return LayerGroupBy(self.aggregate, zones, layer, self.masks)

def write(self, bbox, output_path, tile_degrees=None, **kwargs):
def write(self, bbox, output_path, tile_degrees=None, buffer_meters=None, **kwargs):
chrowe marked this conversation as resolved.
Show resolved Hide resolved
"""
Write the layer to a path. Does not apply masks.

:param bbox: (min x, min y, max x, max y)
:param output_path: local or s3 path to output to
:param tile_degrees: optional param to tile the results into multiple files with a VRT.
Degrees to tile by. `output_path` should be a folder path to store the tiles.
: param buffer_meters: tile buffer distance in meters
:return:
"""

if tile_degrees is not None:
tiles = create_fishnet_grid(*bbox, tile_degrees)
if tile_degrees is None:
clipped_data = self.aggregate.get_data(bbox)
write_layer(output_path, clipped_data)
else:
tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_degrees, buffer_meters)

# write raster data to files
if not os.path.exists(output_path):
os.makedirs(output_path)

file_names = []
tile_serial_id = 1
tile_dict = {"tile_name": [], "geometry": []}
for tile in tiles["geometry"]:
data = self.aggregate.get_data(tile.bounds)
for tile in tile_grid:
tile_name = tile['tile_name']
tile_geom = tile['geometry']

tile_suffix = str(tile_serial_id).zfill(3)
file_name = f'tile_{tile_suffix}.tif'
file_path = os.path.join(output_path, file_name)
file_names.append(file_path)
write_layer(file_path, data)
file_path = os.path.join(output_path, tile_name)
clipped_data = self.aggregate.get_data(tile_geom.bounds)
write_layer(file_path, clipped_data)

tile_dict['tile_name'].append(file_name)
tile_dict['geometry'].append(tile)
# write tile grid to geojson file
_write_tile_grid(tile_grid, output_path, 'tile_grid.geojson')

tile_serial_id += 1
# if tiles were buffered, also write unbuffered tile grid to geojson file
if unbuffered_tile_grid:
_write_tile_grid(unbuffered_tile_grid, output_path, 'tile_grid_unbuffered.geojson')

# Write tile polygons to geojson file
tile_grid_gdf = gpd.GeoDataFrame(tile_dict, crs='EPSG:4326')
tile_grid_file_path = os.path.join(output_path, 'tile_grid.geojson')
tile_grid_gdf.to_file(tile_grid_file_path)
else:
data = self.aggregate.get_data(bbox)
write_layer(output_path, data)

def _get_tile_boundaries(bbox, tile_degrees, buffer_meters):
has_buffer = True if buffer_meters is not None and buffer_meters != 0 else False
if has_buffer:
lon_degree_offset, lat_degree_offset = meters_to_offset_degrees(bbox, buffer_meters)
tiles = create_fishnet_grid(*bbox, tile_degrees, lon_degree_offset, lat_degree_offset)
unbuffered_tiles = create_fishnet_grid(*bbox, tile_degrees)
else:
tiles = create_fishnet_grid(*bbox, tile_degrees)
unbuffered_tiles = None

tile_grid = []
unbuffered_tile_grid = []
for index in range(0, len(tiles)):
tile_serial_id = index + 1
tile_suffix = str(tile_serial_id).zfill(3)
tile_name = f'tile_{tile_suffix}.tif'

tile_geom = tiles.iloc[index]['geometry']
tile_grid.append({"tile_name": tile_name, "geometry": tile_geom})

if has_buffer:
unbuffered_tile_geom = unbuffered_tiles.iloc[index]['geometry']
unbuffered_tile_grid.append({"tile_name": tile_name, "geometry": unbuffered_tile_geom})

return tile_grid, unbuffered_tile_grid

def _write_tile_grid(tile_grid, output_path, target_file_name):
tile_grid = gpd.GeoDataFrame(tile_grid, crs='EPSG:4326')
tile_grid_file_path = str(os.path.join(output_path, target_file_name))
tile_grid.to_file(tile_grid_file_path)

class LayerGroupBy:
def __init__(self, aggregate, zones, layer=None, masks=[]):
Expand Down Expand Up @@ -232,20 +259,24 @@ def get_utm_zone_epsg(bbox) -> str:
return f"EPSG:{epsg}"


def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size):
def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size, lon_degree_buffer=0, lat_degree_buffer=0):
x, y = (min_x, min_y)
geom_array = []

# Polygon Size
while y < max_y:
while x < max_x:
cell_min_x = x - lon_degree_buffer
cell_min_y = y - lat_degree_buffer
cell_max_x = x + cell_size + lon_degree_buffer
cell_max_y = y + cell_size + lat_degree_buffer
geom = geometry.Polygon(
[
(x, y),
(x, y + cell_size),
(x + cell_size, y + cell_size),
(x + cell_size, y),
(x, y),
(cell_min_x, cell_min_y),
(cell_min_x, cell_max_y),
(cell_max_x, cell_max_y),
(cell_max_x, cell_min_y),
(cell_min_x, cell_min_y),
]
)
geom_array.append(geom)
Expand Down Expand Up @@ -335,3 +366,15 @@ def write_dataarray(path, data):
os.remove(tmp_path)
else:
data.rio.to_raster(raster_path=path, driver="COG")

def meters_to_offset_degrees(bbox, offset_meters):
min_lat = bbox[1]
max_lat = bbox[3]
center_lat = (min_lat + max_lat) / 2

meters_per_degree_of_lat = 111111
earth_circumference_meters = 40075000
lon_degree_offset = offset_meters/ (earth_circumference_meters * math.cos(center_lat) / 360)
lat_degree_offset = offset_meters / meters_per_degree_of_lat

return abs(lon_degree_offset), abs(lat_degree_offset)
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,10 @@ def prep_output_path(output_folder, file_name):
def verify_file_is_populated(file_path):
is_populated = True if os.path.getsize(file_path) > 0 else False
return is_populated

def get_file_count_in_folder(dir_path):
count = 0
for path in os.scandir(dir_path):
if path.is_file():
count += 1
return count
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
UrbanLandUse,
WorldPop, Layer, ImperviousSurface
)
from .conftest import RUN_DUMPS, prep_output_path, verify_file_is_populated
from .conftest import RUN_DUMPS, prep_output_path, verify_file_is_populated, get_file_count_in_folder
from ...tools.general_tools import get_class_default_spatial_resolution


Expand All @@ -35,11 +35,26 @@ def test_write_albedo(target_folder, bbox_info, target_spatial_resolution_multip
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_albedo_tiled(target_folder, bbox_info, target_spatial_resolution_multiplier):
def test_write_albedo_tiled_unbuffered(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'albedo_tiled.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo())
Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=0.01)
assert verify_file_is_populated(file_path)
(Albedo(spatial_resolution=target_resolution).
write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=None))
file_count = get_file_count_in_folder(file_path)

assert file_count == 5

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_albedo_tiled_buffered(target_folder, bbox_info, target_spatial_resolution_multiplier):
buffer_meters = 500
file_path = prep_output_path(target_folder, 'albedo_tiled.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo())
(Albedo(spatial_resolution=target_resolution).
write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=buffer_meters))
file_count = get_file_count_in_folder(file_path)

assert file_count == 6


@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_alos_dsm(target_folder, bbox_info, target_spatial_resolution_multiplier):
Expand Down
30 changes: 28 additions & 2 deletions tests/test_methods.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np

from city_metrix.layers.layer import create_fishnet_grid, meters_to_offset_degrees
from .conftest import (
LARGE_ZONES,
ZONES,
Expand All @@ -16,7 +16,6 @@ def test_count():
assert counts.size == 100
assert all([count == 100 for count in counts])


def test_mean():
means = MockLayer().groupby(ZONES).mean()
assert means.size == 100
Expand All @@ -34,6 +33,33 @@ def test_fishnetted_mean():
assert means.size == 100
assert all([mean == i for i, mean in enumerate(means)])

def test_fishnet():
min_x = -38.3
min_y = -80.1
max_x = -38.2
max_y = -80.0
cell_size = 0.01
lon_degree_buffer = 0.004
lat_degree_buffer = 0.004
result_fishnet = (
create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size, lon_degree_buffer, lat_degree_buffer))

actual_count = result_fishnet.geometry.count()
expected_count = 110
assert actual_count == expected_count

def test_meters_to_offset_degrees():
# random high-latitude location where long offset is > lat offset
bbox = (-38.355, -82.821, -38.338, -82.804)
offset_meters = 500
lon_degree_offset, lat_degree_offset = meters_to_offset_degrees(bbox, offset_meters)

round_lon_degree_offset = round(lon_degree_offset, 4)
round_lat_degree_offset = round(lat_degree_offset, 4)

assert round_lon_degree_offset > round_lat_degree_offset
assert round_lon_degree_offset == 0.0106


def test_masks():
counts = MockLayer().mask(MockMaskLayer()).groupby(ZONES).count()
Expand Down
Loading