Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/wri/cities-cif into add-ind…
Browse files Browse the repository at this point in the history
…icator-percentpop-euclidean-proximity-to-openspace
  • Loading branch information
weiqi-tori committed Dec 25, 2024
2 parents 0cf7928 + df3e021 commit b963a59
Show file tree
Hide file tree
Showing 36 changed files with 927 additions and 299 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ To run the module,

1. You need access to Google Earth Engine
2. Install <https://cloud.google.com/sdk/docs/install>
3. If you want to use the ERA5 layer, you need to install the [Climate Data Store (CDS) Application Program Interface (API)](https://cds.climate.copernicus.eu/api-how-to)
3. If you want to use the ERA5 layer, you need to install the [Climate Data Store (CDS) Application Program Interface (API)](https://cds.climate.copernicus.eu/how-to-api)

### Interactive development

Expand Down
5 changes: 5 additions & 0 deletions city_metrix/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,8 @@
from .nasa_dem import NasaDEM
from .era_5_hottest_day import Era5HottestDay
from .impervious_surface import ImperviousSurface
from .glad_lulc import LandCoverGlad
from .glad_lulc import LandCoverSimplifiedGlad
from .glad_lulc import LandCoverHabitatGlad
from .glad_lulc import LandCoverHabitatChangeGlad
from .cams import Cams
87 changes: 63 additions & 24 deletions city_metrix/layers/albedo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,29 @@
import xarray
from dask.diagnostics import ProgressBar

from .layer import Layer, get_utm_zone_epsg, get_image_collection
from .layer import Layer, get_utm_zone_epsg, get_image_collection, set_resampling_for_continuous_raster


class Albedo(Layer):
"""
Attributes:
start_date: starting date for data retrieval
end_date: ending date for data retrieval
spatial_resolution: raster resolution in meters (see https://github.com/stac-extensions/raster)
resampling_method: interpolation method used by Google Earth Engine. Default is 'bilinear'. All options are: ('bilinear', 'bicubic', None).
threshold: threshold value for filtering the retrieval
"""

def __init__(self, start_date="2021-01-01", end_date="2022-01-01", spatial_resolution=10, threshold=None, **kwargs):
def __init__(self, start_date="2021-01-01", end_date="2022-01-01", spatial_resolution:int=10,
resampling_method:str='bilinear', threshold=None, **kwargs):
super().__init__(**kwargs)
self.start_date = start_date
self.end_date = end_date
self.spatial_resolution = spatial_resolution
self.resampling_method = resampling_method
self.threshold = threshold

def get_data(self, bbox):
def get_data(self, bbox: tuple[float, float, float, float]):
S2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
S2C = ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY")

Expand All @@ -34,19 +38,33 @@ def get_data(self, bbox):
def mask_and_count_clouds(s2wc, geom):
s2wc = ee.Image(s2wc)
geom = ee.Geometry(geom.geometry())
is_cloud = ee.Image(s2wc.get('cloud_mask')).gt(MAX_CLOUD_PROB).rename('is_cloud')
is_cloud = (ee.Image(s2wc.get('cloud_mask'))
.gt(MAX_CLOUD_PROB)
.rename('is_cloud')
)

nb_cloudy_pixels = is_cloud.reduceRegion(
reducer=ee.Reducer.sum().unweighted(),
geometry=geom,
scale=self.spatial_resolution,
maxPixels=1e9
)
return s2wc.updateMask(is_cloud.eq(0)).set('nb_cloudy_pixels',
nb_cloudy_pixels.getNumber('is_cloud')).divide(10000)
mask = (s2wc
.updateMask(is_cloud.eq(0))
.set('nb_cloudy_pixels',nb_cloudy_pixels.getNumber('is_cloud'))
.divide(10000)
)

return mask

def mask_clouds_and_rescale(im):
clouds = ee.Image(im.get('cloud_mask')).select('probability')
return im.updateMask(clouds.lt(MAX_CLOUD_PROB)).divide(10000)
clouds = ee.Image(im.get('cloud_mask')
).select('probability')
mask = im.updateMask(clouds
.lt(MAX_CLOUD_PROB)
).divide(10000)

return mask

def get_masked_s2_collection(roi, start, end):
criteria = (ee.Filter.And(
Expand All @@ -55,23 +73,29 @@ def get_masked_s2_collection(roi, start, end):
))
s2 = S2.filter(criteria) # .select('B2','B3','B4','B8','B11','B12')
s2c = S2C.filter(criteria)
s2_with_clouds = (ee.Join.saveFirst('cloud_mask').apply(**{
'primary': ee.ImageCollection(s2),
'secondary': ee.ImageCollection(s2c),
'condition': ee.Filter.equals(**{'leftField': 'system:index', 'rightField': 'system:index'})
}))
s2_with_clouds = (
ee.Join.saveFirst('cloud_mask')
.apply(**{
'primary': ee.ImageCollection(s2),
'secondary': ee.ImageCollection(s2c),
'condition': ee.Filter.equals(**{'leftField': 'system:index', 'rightField': 'system:index'})
})
)

def _mcc(im):
return mask_and_count_clouds(im, roi)
# s2_with_clouds=ee.ImageCollection(s2_with_clouds).map(_mcc)

# s2_with_clouds=s2_with_clouds.limit(image_limit,'nb_cloudy_pixels')
s2_with_clouds = ee.ImageCollection(s2_with_clouds).map(
mask_clouds_and_rescale) # .limit(image_limit,'CLOUDY_PIXEL_PERCENTAGE')
return ee.ImageCollection(s2_with_clouds)
s2_with_clouds = (ee.ImageCollection(s2_with_clouds)
.map(mask_clouds_and_rescale)
) # .limit(image_limit,'CLOUDY_PIXEL_PERCENTAGE')

# calculate albedo for images
s2_with_clouds_ic = ee.ImageCollection(s2_with_clouds)

return s2_with_clouds_ic

# calculate albedo for images
# weights derived from
# S. Bonafoni and A. Sekertekin, "Albedo Retrieval From Sentinel-2 by New Narrow-to-Broadband Conversion Coefficients," in IEEE Geoscience and Remote Sensing Letters, vol. 17, no. 9, pp. 1618-1622, Sept. 2020, doi: 10.1109/LGRS.2020.2967085.
def calc_s2_albedo(image):
Expand All @@ -89,20 +113,35 @@ def calc_s2_albedo(image):
'SWIR1': image.select('B11'),
'SWIR2': image.select('B12')
}
return image.expression(S2_ALBEDO_EQN, config).double().rename('albedo')

albedo = image.expression(S2_ALBEDO_EQN, config).double().rename('albedo')

return albedo

## S2 MOSAIC AND ALBEDO
dataset = get_masked_s2_collection(ee.Geometry.BBox(*bbox), self.start_date, self.end_date)
s2_albedo = dataset.map(calc_s2_albedo)
albedo_mean = s2_albedo.reduce(ee.Reducer.mean())

data = (get_image_collection(
ee.ImageCollection(albedo_mean), bbox, self.spatial_resolution, "albedo")
.albedo_mean)
albedo_mean = (s2_albedo
.map(lambda x:
set_resampling_for_continuous_raster(x,
self.resampling_method,
self.spatial_resolution,
bbox
)
)
.reduce(ee.Reducer.mean())
)

albedo_mean_ic = ee.ImageCollection(albedo_mean)
data = get_image_collection(
albedo_mean_ic,
bbox,
self.spatial_resolution,
"albedo"
).albedo_mean

if self.threshold is not None:
return data.where(data < self.threshold)

return data


39 changes: 29 additions & 10 deletions city_metrix/layers/alos_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,45 @@
import xee
import xarray as xr

from .layer import Layer, get_image_collection

from .layer import Layer, get_image_collection, set_resampling_for_continuous_raster


class AlosDSM(Layer):
"""
Attributes:
spatial_resolution: raster resolution in meters (see https://github.com/stac-extensions/raster)
resampling_method: interpolation method used by Google Earth Engine. Default is 'bilinear'. All options are: ('bilinear', 'bicubic', None).
"""

def __init__(self, spatial_resolution=30, **kwargs):
def __init__(self, spatial_resolution:int=30, resampling_method:str='bilinear', **kwargs):
super().__init__(**kwargs)
self.spatial_resolution = spatial_resolution
self.resampling_method = resampling_method

def get_data(self, bbox: tuple[float, float, float, float]):
alos_dsm = ee.ImageCollection("JAXA/ALOS/AW3D30/V3_2")

alos_dsm_ic = ee.ImageCollection(
alos_dsm
.filterBounds(ee.Geometry.BBox(*bbox))
.select('DSM')
.map(lambda x:
set_resampling_for_continuous_raster(x,
self.resampling_method,
self.spatial_resolution,
bbox
)
)
.mean()
)


def get_data(self, bbox):
dataset = ee.ImageCollection("JAXA/ALOS/AW3D30/V3_2")
alos_dsm = ee.ImageCollection(dataset
.filterBounds(ee.Geometry.BBox(*bbox))
.select('DSM')
.mean()
)
data = get_image_collection(alos_dsm, bbox, self.spatial_resolution, "ALOS DSM").DSM
data = get_image_collection(
alos_dsm_ic,
bbox,
self.spatial_resolution,
"ALOS DSM"
).DSM

return data
14 changes: 10 additions & 4 deletions city_metrix/layers/average_net_building_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .layer import Layer, get_utm_zone_epsg, get_image_collection


class AverageNetBuildingHeight(Layer):
"""
Attributes:
Expand All @@ -23,8 +24,13 @@ def get_data(self, bbox):
# GLOBE - ee.Image("projects/wri-datalab/GHSL/GHS-BUILT-H-ANBH_GLOBE_R2023A")

anbh = ee.Image("projects/wri-datalab/GHSL/GHS-BUILT-H-ANBH_GLOBE_R2023A")
data = (get_image_collection(
ee.ImageCollection(anbh), bbox, self.spatial_resolution, "average net building height")
.b1)


anbh_ic = ee.ImageCollection(anbh)
data = get_image_collection(
anbh_ic,
bbox,
self.spatial_resolution,
"average net building height"
).b1

return data
13 changes: 10 additions & 3 deletions city_metrix/layers/built_up_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,15 @@ def get_data(self, bbox):
# ANBH is the average height of the built surfaces, USE THIS
# AGBH is the amount of built cubic meters per surface unit in the cell
# ee.ImageCollection("projects/wri-datalab/GHSL/GHS-BUILT-H-ANBH_R2023A")

built_height = ee.Image("JRC/GHSL/P2023A/GHS_BUILT_H/2018")
data = get_image_collection(ee.ImageCollection(built_height), bbox, self.spatial_resolution, "built up height")
return data.built_height

built_height_ic = ee.ImageCollection(built_height)
data = get_image_collection(
built_height_ic,
bbox,
self.spatial_resolution,
"built up height"
).built_height

return data
98 changes: 98 additions & 0 deletions city_metrix/layers/cams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import cdsapi
import os
import xarray as xr
import zipfile

from .layer import Layer


class Cams(Layer):
def __init__(self, start_date="2023-01-01", end_date="2023-12-31", **kwargs):
super().__init__(**kwargs)
self.start_date = start_date
self.end_date = end_date

def get_data(self, bbox):
min_lon, min_lat, max_lon, max_lat = bbox

c = cdsapi.Client()
c.retrieve(
'cams-global-reanalysis-eac4',
{
'variable': [
"2m_temperature", "mean_sea_level_pressure",
"particulate_matter_2.5um", "particulate_matter_10um",
"carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide"
],
"model_level": ["60"],
"date": [f"{self.start_date}/{self.end_date}"],
'time': ['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00'],
'area': [max_lat, min_lon, min_lat, max_lon],
'data_format': 'netcdf_zip',
},
'cams_download.zip')

# If data files from earlier runs not deleted, save new files with postpended numbers
existing_cams_downloads = [fname for fname in os.listdir('.') if fname.startswith('cams_download') and not fname.endswith('.zip')]
num_id = len(existing_cams_downloads)
while f'cams_download_{num_id}' in existing_cams_downloads:
num_id += 1
fname = f'cams_download{"" if num_id == 0 else f"_{num_id}"}'
os.makedirs(fname, exist_ok=True)

# extract the ZIP file
with zipfile.ZipFile('cams_download.zip', 'r') as zip_ref:
# Extract all the contents of the ZIP file to the specified directory
zip_ref.extractall(fname)

# load netcdf files
dataarray_list = []
for nc_file in os.listdir(fname):
with xr.open_dataset(f'{fname}/{nc_file}') as dataarray:
dataarray_list.append(dataarray)

# not all variables have 'model_level', concatenate without 'model_level' dimension
dataarray_list = [
dataarray.squeeze(dim='model_level').drop_vars(['model_level'])
if 'model_level' in dataarray.dims
else dataarray
for dataarray in dataarray_list
]
# assign coordinate with last dataarray to fix 1) use 360 degree system issue 2) slightly different lat lons
dataarray_list = [
dataarray.assign_coords(dataarray_list[-1].coords)
for dataarray in dataarray_list
]
data = xr.merge(dataarray_list)

# unit conversion
# particulate matter: concentration * 10^9
# target unit is ug/m3
for var in ['pm2p5', 'pm10']:
data[var].values = data[var].values * (10 ** 9)
# other: concentration x pressure / (287.058 * temp) * 10^9
# target unit is ug/m3
for var in ['co', 'no2', 'go3', 'so2']:
data[var].values = data[var].values * data['msl'].values / (287.058 * data['t2m'].values) * (10 ** 9)

# drop pressure and temperature
data = data.drop_vars(['msl', 't2m'])
# xarray.Dataset to xarray.DataArray
data = data.to_array()

# Remove local files
os.remove('cams_download.zip')
# Workaround for elusive permission error
try:
for nc_file in os.listdir(fname):
os.remove(f'{fname}/{nc_file}')
os.rmdir(fname)
except:
pass

# Select the nearest data point based on latitude and longitude
center_lon = (min_lon + max_lon) / 2
center_lat = (min_lat + max_lat) / 2
data = data.sel(latitude=center_lat, longitude=center_lon, method="nearest")

return data
11 changes: 5 additions & 6 deletions city_metrix/layers/era_5_hottest_day.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,11 @@ def hourly_mean_temperature(image):
},
f'download_{i}.nc')

dataarray = xr.open_dataset(f'download_{i}.nc')

# Subset times for the day
times = [valid_time.astype('datetime64[s]').astype(datetime).replace(tzinfo=pytz.UTC) for valid_time in dataarray['valid_time'].values]
indices = [i for i, value in enumerate(times) if value in utc_times]
subset_dataarray = dataarray.isel(valid_time=indices)
with xr.open_dataset(f'download_{i}.nc') as dataarray:
# Subset times for the day
times = [valid_time.astype('datetime64[s]').astype(datetime).replace(tzinfo=pytz.UTC) for valid_time in dataarray['valid_time'].values]
indices = [i for i, value in enumerate(times) if value in utc_times]
subset_dataarray = dataarray.isel(valid_time=indices).load()

dataarray_list.append(subset_dataarray)

Expand Down
Loading

0 comments on commit b963a59

Please sign in to comment.