Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIF-313 Air quality metrics #94

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions city_metrix/layers/cams.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import cdsapi
import os
import os, datetime
import xarray as xr
import zipfile

Expand All @@ -16,6 +16,7 @@ def get_data(self, bbox):
min_lon, min_lat, max_lon, max_lat = bbox

c = cdsapi.Client()
timestamp = str(datetime.datetime.utcnow().timestamp()).replace('.', '-')
c.retrieve(
'cams-global-reanalysis-eac4',
{
Expand All @@ -30,18 +31,13 @@ def get_data(self, bbox):
'area': [max_lat, min_lon, min_lat, max_lon],
'data_format': 'netcdf_zip',
},
'cams_download.zip')
f'cams_download_{timestamp}.zip')

# If data files from earlier runs not deleted, save new files with postpended numbers
existing_cams_downloads = [fname for fname in os.listdir('.') if fname.startswith('cams_download') and not fname.endswith('.zip')]
num_id = len(existing_cams_downloads)
while f'cams_download_{num_id}' in existing_cams_downloads:
num_id += 1
fname = f'cams_download{"" if num_id == 0 else f"_{num_id}"}'
fname = f'cams_download_{timestamp}'
os.makedirs(fname, exist_ok=True)

# extract the ZIP file
with zipfile.ZipFile('cams_download.zip', 'r') as zip_ref:
with zipfile.ZipFile(f'cams_download_{timestamp}.zip', 'r') as zip_ref:
# Extract all the contents of the ZIP file to the specified directory
zip_ref.extractall(fname)

Expand Down Expand Up @@ -74,14 +70,13 @@ def get_data(self, bbox):
# target unit is ug/m3
for var in ['co', 'no2', 'go3', 'so2']:
data[var].values = data[var].values * data['msl'].values / (287.058 * data['t2m'].values) * (10 ** 9)

# drop pressure and temperature
data = data.drop_vars(['msl', 't2m'])
# xarray.Dataset to xarray.DataArray
data = data.to_array()

# Remove local files
os.remove('cams_download.zip')
os.remove(f'cams_download_{timestamp}.zip')
# Workaround for elusive permission error
try:
for nc_file in os.listdir(fname):
Expand Down
2 changes: 2 additions & 0 deletions city_metrix/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
from .urban_open_space import urban_open_space
from .natural_areas import natural_areas
from .era_5_met_preprocessing import era_5_met_preprocessing
from .who_air_pollutant_exceedance_days import who_air_pollutant_exceedance_days
from .annual_dailyconcentration_statistic import annual_dailyconcentration_statistic
65 changes: 65 additions & 0 deletions city_metrix/metrics/annual_dailyconcentration_statistic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import geopandas as gpd
import numpy as np
from city_metrix.layers import Layer, Cams

SPECIES_INFO = {
'no2': {
'name': 'nitrogen dioxide',
'molar_mass': 46.0055,
'who_threshold': 25.0,
'eac4_varname': 'no2'
},
'so2': {
'name': 'sulfur dioxide',
'molar_mass': 64.066,
'who_threshold': 40.0,
'eac4_varname': 'so2'
},
'o3': { # Ozone thresholds are based on 8-hour average, not 24-hour.
'name': 'ozone',
'molar_mass': 48.0,
'who_threshold': 100.0,
'eac4_varname': 'go3'
},
'pm25': {
'name': 'fine particulate matter',
'who_threshold': 15.0,
'eac4_varname': 'pm2p5'
},
'pm2p5': {
'name': 'fine particulate matter',
'who_threshold': 15.0,
'eac4_varname': 'pm2p5'
},
'pm10': {
'name': 'coarse particulate matter',
'who_threshold': 45.0,
'eac4_varname': 'pm10'
},
'co': {
'name': 'carbon monoxide',
'molar_mass': 28.01,
'who_threshold': 4000.0,
'eac4_varname': 'co'
}
}

def annual_dailyconcentration_statistic(zones, species, statname):
# species is one of 'co', 'no2', 'o3', 'so2', 'pm2p5', 'pm10'
# statname is one of 'mean', 'max'
if not species in ['co', 'no2', 'o3', 'so2', 'pm2p5', 'pm10']:
raise Exception(f'Unsupported pollutant species {species}')
zones_copy = zones.copy(deep=True)
result = []
for rownum in range(len(zones)):
zone = zones.iloc[[rownum]]
cams_data = cams_layer.get_data(zone.total_bounds).sel(variable=SPECIES_INFO[species]['eac4_varname'])
cams_data_daily = cams_data.resample({'valid_time': '1D'}).mean().data
if statname == 'mean':
result.append(cams_data_daily.mean())
elif statname == 'max':
result.append(cams_data_daily.max())
else:
raise Exception(f'Unsupported stat type {statname}')
zones_copy[f'{statname}_daily_{species}'] = result
return zones_copy
89 changes: 89 additions & 0 deletions city_metrix/metrics/who_air_pollutant_exceedance_days.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import geopandas as gpd
import numpy as np
from city_metrix.layers import Layer, Cams

SPECIES_INFO = {
'no2': {
'name': 'nitrogen dioxide',
'molar_mass': 46.0055,
'who_threshold': 25.0,
'eac4_varname': 'no2'
},
'so2': {
'name': 'sulfur dioxide',
'molar_mass': 64.066,
'who_threshold': 40.0,
'eac4_varname': 'so2'
},
'o3': { # Ozone thresholds are based on 8-hour average, not 24-hour.
'name': 'ozone',
'molar_mass': 48.0,
'who_threshold': 100.0,
'eac4_varname': 'go3'
},
'pm25': {
'name': 'fine particulate matter',
'who_threshold': 15.0,
'eac4_varname': 'pm2p5'
},
'pm2p5': {
'name': 'fine particulate matter',
'who_threshold': 15.0,
'eac4_varname': 'pm2p5'
},
'pm10': {
'name': 'coarse particulate matter',
'who_threshold': 45.0,
'eac4_varname': 'pm10'
},
'co': {
'name': 'carbon monoxide',
'molar_mass': 28.01,
'who_threshold': 4000.0,
'eac4_varname': 'co'
}
}

def max_n_hr(arr, n):
# Returns highest mean concentration over an n-hr period for a given array
resampled_1hr = arr.resample({'valid_time': '1H'}).interpolate('linear')
max_by_offset = None
for offs in range(n):
resampled_n_hr = resampled_1hr.resample({'valid_time': f'{n}H'}, offset=offs).mean().data
candidate = resampled_n_hr.max()
if max_by_offset is None:
max_by_offset = candidate
else:
max_by_offset = max(max_by_offset, candidate)
return max_by_offset

def who_air_pollutant_exceedance_days(zones, species=None):
# species is list including these elements: 'co', 'no2', 'o3', 'so2', 'pm2p5', 'pm10'
# returns GeoSeries with column with number of days any species exceeds WHO guideline
if species is not None:
if not isinstance(species, (list, tuple, set)) or len(species) == 0 or sum([not i in ['co', 'no2', 'o3', 'so2', 'pm2p5', 'pm10'] for i in species]) > 0:
raise Except('Argument species must be list-like containing any non-empty subset of \'co\', \'no2\', \'o3\', \'so2\', \'pm2p5\', \'pm10\')
zones_copy = zones.copy(deep=True)
result = []
for rownum in range(len(zones)):
zone = zones.iloc[[rownum]]
cams_data = cams_layer.get_data(zone.total_bounds)
if species is None:
species = ['co', 'no2', 'o3', 'so2', 'pm2p5', 'pm10']
excdays = []
for s in species:
if s == 'o3':
n = 8
ds = d.sel(variable=SPECIES_INFO['o3']['eac4_varname'])
day_data = [ds[i * n: (i + 1)* n] for i in range(365)]
maxconc_by_day = [max_n_hr(arr, n) for arr in day_data]
excdays.append([conc > SPECIES_INFO['o3']['who_threshold'] for conc in maxconc_by_day])
else:
ds = d.sel(variable=SPECIES_INFO[s]['eac4_varname'])
maxconc_by_day = ds.resample({'valid_time': '1D'}).mean().data
excdays.append([conc > SPECIES_INFO[s]['who_threshold'] for conc in maxconc_by_day])
excdays_np = np.vstack(excdays)
result.append(excdays_np.any(axis=0).sum())
zones_copy['exceedancedays_{0}'.format('-'.join(species))] = result
return zones_copy

11 changes: 10 additions & 1 deletion tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
import pytest


def test_annual_dailyconcentration_statistic():
indicator_mean = test_annual_dailyconcentration_statistic(ZONES, 'mean')
indicator_max = test_annual_dailyconcentration_statistic(ZONES, 'max')
assert indicator_mean
assert indicator_max

def test_built_land_with_high_lst():
indicator = built_land_with_high_land_surface_temperature(ZONES)
expected_zone_size = ZONES.geometry.size
Expand Down Expand Up @@ -43,9 +49,12 @@ def test_natural_areas():
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size


def test_urban_open_space():
indicator = urban_open_space(ZONES)
expected_zone_size = ZONES.geometry.size
actual_indicator_size = indicator.size
assert expected_zone_size == actual_indicator_size

def test_who_air_pollutant_exceedance_days()
indicator = who_air_pollutant_exceedance_days(ZONES, ['so2'])
assert indicator <= 365
Loading