Skip to content

Commit

Permalink
Merge pull request #49 from wri/feature/era5
Browse files Browse the repository at this point in the history
add ERA 5 highest temperature layer
  • Loading branch information
chrowe authored Oct 11, 2024
2 parents ecede70 + 34a549f commit 3540322
Show file tree
Hide file tree
Showing 12 changed files with 714 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev_ci_cd_conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ jobs:
GOOGLE_APPLICATION_USER: ${{ secrets.GOOGLE_APPLICATION_USER }}
GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
run: |
pytest tests
pytest tests
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ To run the module,

1. You need access to Google Earth Engine
2. Install <https://cloud.google.com/sdk/docs/install>
3. If you want to use the ERA5 layer, you need to install the [Climate Data Store (CDS) Application Program Interface (API)](https://cds.climate.copernicus.eu/api-how-to)

### Interactive development

Expand Down
1 change: 1 addition & 0 deletions city_metrix/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@
from .alos_dsm import AlosDSM
from .overture_buildings import OvertureBuildings
from .nasa_dem import NasaDEM
from .era_5_hottest_day import Era5HottestDay
from .impervious_surface import ImperviousSurface
115 changes: 115 additions & 0 deletions city_metrix/layers/era_5_hottest_day.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import ee
from timezonefinder import TimezoneFinder
from pytz import timezone
from datetime import datetime
import pytz
import cdsapi
import os
import xarray as xr

from .layer import Layer

class Era5HottestDay(Layer):
def __init__(self, start_date="2023-01-01", end_date="2024-01-01", **kwargs):
super().__init__(**kwargs)
self.start_date = start_date
self.end_date = end_date

def get_data(self, bbox):
dataset = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")

# Function to find the city mean temperature of each hour
def hourly_mean_temperature(image):
hourly_mean = image.select('temperature_2m').reduceRegion(
reducer=ee.Reducer.mean(),
geometry=ee.Geometry.BBox(*bbox),
scale=11132,
bestEffort=True
).values().get(0)

return image.set('hourly_mean_temperature', hourly_mean)

era5 = ee.ImageCollection(dataset
.filterBounds(ee.Geometry.BBox(*bbox))
.filterDate(self.start_date, self.end_date)
.select('temperature_2m')
)

era5_hourly_mean = era5.map(hourly_mean_temperature)

# Sort the collection based on the highest temperature and get the first image
highest_temperature_day = era5_hourly_mean.sort('hourly_mean_temperature', False).first()
highest_temperature_day = highest_temperature_day.get('system:index').getInfo()

# system:index in format 20230101T00
year = highest_temperature_day[0:4]
month = highest_temperature_day[4:6]
day = highest_temperature_day[6:8]
time = highest_temperature_day[-2:]

min_lon, min_lat, max_lon, max_lat = bbox
center_lon = (min_lon + max_lon) / 2
center_lat = (min_lat + max_lat) / 2

# Initialize TimezoneFinder
tf = TimezoneFinder()
# Find the timezone of the center point
tz_name = tf.timezone_at(lng=center_lon, lat=center_lat)
# Get the timezone object
local_tz = timezone(tz_name)
# Define the UTC time
utc_time = datetime.strptime(f'{year}-{month}-{day} {time}:00:00', "%Y-%m-%d %H:%M:%S")

# Convert UTC time to local time
local_time = utc_time.replace(tzinfo=pytz.utc).astimezone(local_tz)
local_date = local_time.date()

utc_times = []
for i in range(0, 24):
local_time_hourly = local_tz.localize(datetime(local_date.year, local_date.month, local_date.day, i, 0))
utc_time_hourly = local_time_hourly.astimezone(pytz.utc)
utc_times.append(utc_time_hourly)

utc_dates = list(set([dt.date() for dt in utc_times]))

dataarray_list = []
c = cdsapi.Client()
for i in range(len(utc_dates)):
c.retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'variable': [
'10m_u_component_of_wind', '10m_v_component_of_wind', '2m_dewpoint_temperature',
'2m_temperature', 'clear_sky_direct_solar_radiation_at_surface', 'mean_surface_direct_short_wave_radiation_flux_clear_sky',
'mean_surface_downward_long_wave_radiation_flux_clear_sky', 'sea_surface_temperature', 'total_precipitation',
],
'year': utc_dates[i].year,
'month': utc_dates[i].month,
'day': utc_dates[i].day,
'time': ['00:00', '01:00', '02:00', '03:00', '04:00', '05:00', '06:00', '07:00', '08:00', '09:00',
'10:00', '11:00', '12:00', '13:00', '14:00', '15:00', '16:00', '17:00', '18:00', '19:00',
'20:00', '21:00', '22:00', '23:00'],
'area': [max_lat, min_lon, min_lat, max_lon],
'data_format': 'netcdf',
'download_format': 'unarchived'
},
f'download_{i}.nc')

dataarray = xr.open_dataset(f'download_{i}.nc')

# Subset times for the day
times = [valid_time.astype('datetime64[s]').astype(datetime).replace(tzinfo=pytz.UTC) for valid_time in dataarray['valid_time'].values]
indices = [i for i, value in enumerate(times) if value in utc_times]
subset_dataarray = dataarray.isel(valid_time=indices)

dataarray_list.append(subset_dataarray)

# Remove local file
os.remove(f'download_{i}.nc')

data = xr.concat(dataarray_list, dim='valid_time')
# xarray.Dataset to xarray.DataArray
data = data.to_array()

return data
3 changes: 2 additions & 1 deletion city_metrix/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
from .built_land_with_high_land_surface_temperature import built_land_with_high_land_surface_temperature
from .mean_tree_cover import mean_tree_cover
from .urban_open_space import urban_open_space
from .natural_areas import natural_areas
from .natural_areas import natural_areas
from .era_5_met_preprocessing import era_5_met_preprocessing
68 changes: 68 additions & 0 deletions city_metrix/metrics/era_5_met_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from datetime import datetime
import pandas as pd
import numpy as np
from geopandas import GeoDataFrame, GeoSeries

from city_metrix.layers import Era5HottestDay


def era_5_met_preprocessing(zones: GeoDataFrame) -> GeoSeries:
"""
Get ERA 5 data for the hottest day
:param zones: GeoDataFrame with geometries to collect zonal stats on
:return: Pandas Dataframe of data
"""
era_5_data = Era5HottestDay().get_data(zones.total_bounds)

t2m_var = era_5_data.sel(variable='t2m').values
u10_var = era_5_data.sel(variable='u10').values
v10_var = era_5_data.sel(variable='v10').values
sst_var = era_5_data.sel(variable='sst').values
cdir_var = era_5_data.sel(variable='cdir').values
sw_var = era_5_data.sel(variable='msdrswrfcs').values
lw_var = era_5_data.sel(variable='msdwlwrfcs').values
d2m_var = era_5_data.sel(variable='d2m').values
time_var = era_5_data['valid_time'].values
lat_var = era_5_data['latitude'].values
lon_var = era_5_data['longitude'].values

# temps go from K to C; global rad (cdir) goes from /hour to /second; wind speed from vectors (pythagorean)
# rh calculated from temp and dew point; vpd calculated from tepm and rh
times = [time.astype('datetime64[s]').astype(datetime) for time in time_var]
t2m_vals = (t2m_var[:]-273.15)
d2m_vals = (d2m_var[:]-273.15)
rh_vals = (100*(np.exp((17.625*d2m_vals)/(243.04+d2m_vals))/np.exp((17.625*t2m_vals)/(243.04+t2m_vals))))
grad_vals = (cdir_var[:]/3600)
dir_vals = (sw_var[:])
dif_vals = (lw_var[:])
wtemp_vals = (sst_var[:]-273.15)
wind_vals = (np.sqrt(((np.square(u10_var[:]))+(np.square(v10_var[:])))))
# calc vapor pressure deficit in hPa for future utci conversion. first, get svp in pascals and then get vpd
svp_vals = (0.61078*np.exp(t2m_vals/(t2m_vals+237.3)*17.2694))
vpd_vals = ((svp_vals*(1-(rh_vals/100))))*10

# make lat/lon grid
latitudes = lat_var[:]
longitudes = lon_var[:]
latitudes_2d, longitudes_2d = np.meshgrid(latitudes, longitudes, indexing='ij')
latitudes_flat = latitudes_2d.flatten()
longitudes_flat = longitudes_2d.flatten()

# create pandas dataframe
df = pd.DataFrame({
'time': np.repeat(times, len(latitudes_flat)),
'lat': np.tile(latitudes_flat, len(times)),
'lon': np.tile(longitudes_flat, len(times)),
'temp': t2m_vals.flatten(),
'rh': rh_vals.flatten(),
'global_rad': grad_vals.flatten(),
'direct_rad': dir_vals.flatten(),
'diffuse_rad': dif_vals.flatten(),
'water_temp': wtemp_vals.flatten(),
'wind': wind_vals.flatten(),
'vpd': vpd_vals.flatten()
})
# round all numbers to two decimal places, which is the precision needed by the model
df = df.round(2)

return df
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ dependencies:
- geemap=0.32.0
- pip=23.3.1
- boto3=1.34.124
- scikit-learn=1.5.1
- cdsapi=0.7.3
- timezonefinder=6.5.2
- scikit-image=0.24.0
- exactextract=0.2.0
- pip:
Expand Down
124 changes: 124 additions & 0 deletions notebooks/layers/era5.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.dont_write_bytecode=True\n",
"\n",
"%load_ext autoreload\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import geopandas as gpd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# # update the wd path to be able to laod the module\n",
"os.chdir('../..')\n",
"os.getcwd()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Get Area of Interest"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load boundary from s3\n",
"boundary_path = 'https://cities-indicators.s3.eu-west-3.amazonaws.com/data/boundaries/boundary-BRA-Salvador-ADM4union.geojson'\n",
"city_gdf = gpd.read_file(boundary_path, driver='GeoJSON')\n",
"city_gdf.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get area in sqare km\n",
"city_gdf.to_crs(epsg=3857).area / 10**6"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get Layer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%autoreload\n",
"from city_metrix.layers import Era5HottestDay\n",
"\n",
"hottest_day = Era5HottestDay().get_data(city_gdf.total_bounds)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hottest_day"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "cities-cif",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 3540322

Please sign in to comment.