Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

selectively backed out conversion to meters #90

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 32 additions & 20 deletions city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import shapely.geometry as geometry
import pandas as pd

MAX_TILE_SIDE_SIZE_METERS = 60000 # Approx. 0.5 degrees latitudinal offset. TODO How selected?
MAX_TILE_SIZE_DEGREES = 0.5 # TODO Why was this value selected?

class Layer:
def __init__(self, aggregate=None, masks=[]):
Expand Down Expand Up @@ -55,22 +55,22 @@ def groupby(self, zones, layer=None):
"""
return LayerGroupBy(self.aggregate, zones, layer, self.masks)

def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=None, **kwargs):
def write(self, bbox, output_path, tile_degrees=None, buffer_size=None, **kwargs):
"""
Write the layer to a path. Does not apply masks.

:param bbox: (min x, min y, max x, max y)
:param output_path: local or s3 path to output to
:param tile_side_meters: optional param to tile the results into multiple files specified as tile length on a side in meters
: param tile_buffer_meters: tile buffer distance in meters
:param tile_degrees: optional param to tile the results into multiple files specified as tile length on a side
:param buffer_size: tile buffer distance
:return:
"""

if tile_side_meters is None:
if tile_degrees is None:
clipped_data = self.aggregate.get_data(bbox)
write_layer(output_path, clipped_data)
else:
tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters)
tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_degrees, buffer_size)

# write raster data to files
if not os.path.exists(output_path):
Expand All @@ -92,13 +92,13 @@ def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=Non
_write_tile_grid(unbuffered_tile_grid, output_path, 'tile_grid_unbuffered.geojson')


def _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters):
has_buffer = True if tile_buffer_meters is not None and tile_buffer_meters != 0 else False
def _get_tile_boundaries(bbox, tile_degrees, buffer_size):
has_buffer = True if buffer_size is not None and buffer_size != 0 else False
if has_buffer:
tiles = create_fishnet_grid(*bbox, tile_side_meters, tile_buffer_meters)
unbuffered_tiles = create_fishnet_grid(*bbox, tile_side_meters)
tiles = create_fishnet_grid(*bbox, tile_degrees, buffer_size)
unbuffered_tiles = create_fishnet_grid(*bbox, tile_degrees)
else:
tiles = create_fishnet_grid(*bbox, tile_side_meters)
tiles = create_fishnet_grid(*bbox, tile_degrees)
unbuffered_tiles = None

tile_grid = []
Expand Down Expand Up @@ -136,7 +136,7 @@ def count(self):
return self._zonal_stats("count")

def _zonal_stats(self, stats_func):
if box(*self.zones.total_bounds).area <= MAX_TILE_SIDE_SIZE_METERS**2:
if box(*self.zones.total_bounds).area <= MAX_TILE_SIZE_DEGREES**2:
stats = self._zonal_stats_tile(self.zones, [stats_func])
else:
stats = self._zonal_stats_fishnet(stats_func)
Expand All @@ -160,7 +160,7 @@ def group_layer_values(df):

def _zonal_stats_fishnet(self, stats_func):
# fishnet GeoDataFrame into smaller tiles
fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIDE_SIZE_METERS)
fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIZE_DEGREES)

# spatial join with fishnet grid and then intersect geometries with fishnet tiles
joined = self.zones.sjoin(fishnet)
Expand Down Expand Up @@ -257,17 +257,29 @@ def get_utm_zone_epsg(bbox) -> str:
return f"EPSG:{epsg}"


def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, tile_side_meters, tile_buffer_meters=0):
def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, cell_size, buffer_size=0, tile_units_in_degrees=True):
lon_coord, lat_coord = (min_lon, min_lat)
geom_array = []

center_lat = (min_lat + max_lat) / 2
lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, tile_side_meters)
if tile_buffer_meters == 0:
lon_buffer_offset = 0
lat_buffer_offset = 0
if tile_units_in_degrees:
if cell_size > 0.5:
raise Exception('Value for cell_size must be < 0.5 degrees.')

lon_side_offset = cell_size
lat_side_offset = cell_size
lon_buffer_offset = buffer_size
lat_buffer_offset = buffer_size
else:
lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, tile_buffer_meters)
if cell_size < 10:
raise Exception('Value for cell_size must be >= 10 meters.')

center_lat = (min_lat + max_lat) / 2
lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, cell_size)
if buffer_size == 0:
lon_buffer_offset = 0
lat_buffer_offset = 0
else:
lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, buffer_size)

# Polygon Size
while lat_coord < max_lat:
Expand Down
52 changes: 18 additions & 34 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,19 @@
from city_metrix.layers import Layer

import shapely.geometry as geometry
import geopandas as gpd
from city_metrix.layers.layer import create_fishnet_grid
from geocube.api.core import make_geocube


def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size):
x, y = (min_x, min_y)
geom_array = []

# Polygon Size
while y < (max_y - 0.000001):
while x < (max_x - 0.000001):
geom = geometry.Polygon(
[
(x, y),
(x, y + cell_size),
(x + cell_size, y + cell_size),
(x + cell_size, y),
(x, y),
]
)
geom_array.append(geom)
x += cell_size
x = min_x
y += cell_size

fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs("EPSG:4326")
def test_create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size):
# Slightly reduce aoi to avoid partial cells
reduction = 0.000001
max_y = (max_y - reduction)
max_x = (max_x - reduction)
fishnet = create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size)
fishnet.drop('fishnet_geometry', axis=1, inplace=True)
return fishnet


# Test zones of a regular 0.01x0.01 grid over a 0.1x0.1 extent
ZONES = create_fishnet_grid(106.7, -6.3, 106.8, -6.2, 0.01).reset_index()
LARGE_ZONES = create_fishnet_grid(106, -7, 107, -6, 0.1).reset_index()
# Test zones of a regular 0.01x0.01 grid over a 0.1x0.1 extent by degrees
ZONES = test_create_fishnet_grid(106.7, -6.3, 106.8, -6.2, 0.01).reset_index()
LARGE_ZONES = test_create_fishnet_grid(106, -7, 107, -6, 0.1).reset_index()


class MockLayer(Layer):
Expand All @@ -54,7 +35,8 @@ class MockMaskLayer(Layer):
Simple layer where even indices are masked
"""
def get_data(self, bbox):
mask_gdf = create_fishnet_grid(*bbox, 0.01).reset_index()
cell_size_degrees = 0.01
mask_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index()
mask_gdf['index'] = mask_gdf['index'] % 2
mask = make_geocube(
vector_data=mask_gdf,
Expand All @@ -72,7 +54,8 @@ class MockGroupByLayer(Layer):
Simple categorical layer with alternating 1s and 2s
"""
def get_data(self, bbox):
group_by_gdf = create_fishnet_grid(*bbox, 0.001).reset_index()
cell_size_degrees = 0.001
group_by_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index()
group_by_gdf['index'] = (group_by_gdf['index'] % 2) + 1
group_by = make_geocube(
vector_data=group_by_gdf,
Expand Down Expand Up @@ -104,7 +87,8 @@ class MockLargeGroupByLayer(Layer):
"""

def get_data(self, bbox):
group_by_gdf = create_fishnet_grid(*bbox, 0.01).reset_index()
cell_size_degrees = 0.01
group_by_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index()
group_by_gdf['index'] = (group_by_gdf['index'] % 2) + 1
group_by = make_geocube(
vector_data=group_by_gdf,
Expand All @@ -113,4 +97,4 @@ def get_data(self, bbox):
output_crs=4326,
).index

return group_by
return group_by
Loading
Loading