Skip to content

Commit

Permalink
chore: Add test for checking physical limits and zeroes in NWP data o…
Browse files Browse the repository at this point in the history
  • Loading branch information
glitch401 committed Jul 3, 2024
1 parent 0010180 commit 3ee287c
Show file tree
Hide file tree
Showing 59 changed files with 505 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ coverage.xml
.pytest_cache/
test.nc

#test data generator
tests/load/nwp/test_data_generator.py

# Translations
*.mo
*.pot
Expand Down
100 changes: 99 additions & 1 deletion ocf_datapipes/load/nwp/nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,85 @@ def __init__(
self,
zarr_path: Union[Path, str, list[Path], list[str]],
provider: str = "ukv",
check_for_zeros: bool = False,
check_physical_limits: bool = False,
):
"""
Opens NWP Zarr and yields it
Args:
zarr_path: Path to the Zarr file
provider: NWP provider
check_for_zeros: Check for zeros in the NWP data
check_physical_limits: Check the physical limits of nwp data (e.g. -100<temperature<100)
"""
self.zarr_path = zarr_path
self.check_for_zeros = check_for_zeros
self.check_physical_limits = check_physical_limits
self.limits = {
"temperature": (-100, 60), # Celsius
"specific_humidity": (0, 0.03), # kg/kg
"relative_humidity": (0, 100), # Percentage
"pressure": (0, 1100), # hPa (sea level pressure)
"u_wind": (-200, 200), # m/s
"v_wind": (-200, 200), # m/s
"geopotential": (0, 100000), # m^2/s^2
"total_precipitation": (0, 2000), # mm/day
"convective_precipitation": (0, 1000), # mm/day
"snowfall": (0, 1000), # mm water equivalent/day
"graupel": (0, 500), # mm water equivalent/day
"cloud_cover": (0, 100), # Percentage
"surface_temperature": (-90, 60), # Celsius
"sea_surface_temperature": (-2, 35), # Celsius
"soil_temperature": (-50, 60), # Celsius
"soil_moisture": (0, 1), # m^3/m^3
"visibility": (0, 100000), # meters
"wind_gust": (0, 250), # m/s
"solar_radiation": (0, 1500), # W/m^2
"longwave_radiation": (0, 750), # W/m^2
"evaporation": (0, 50), # mm/day
"potential_evaporation": (0, 100), # mm/day
"boundary_layer_height": (0, 5000), # meters
"cape": (0, 10000), # J/kg
"cin": (0, 1000), # J/kg
"lifted_index": (-15, 15), # Kelvin
"total_column_water": (0, 100), # kg/m^2
"ozone_concentration": (0, 1000), # Dobson units
"dew_point_temperature": (-100, 35), # Celsius
"wet_bulb_temperature": (-100, 35), # Celsius
"potential_temperature": (0, 1000), # Kelvin
"equivalent_potential_temperature": (0, 1000), # Kelvin
"vorticity": (-1e-3, 1e-3), # 1/s
"divergence": (-1e-3, 1e-3), # 1/s
"vertical_velocity": (-50, 50), # m/s
"cloud_base_height": (0, 20000), # meters
"cloud_top_height": (0, 20000), # meters
"cloud_water_content": (0, 5), # g/kg
"ice_water_content": (0, 5), # g/kg
"surface_roughness": (0, 10), # meters
"albedo": (0, 1), # dimensionless
"friction_velocity": (0, 5), # m/s
"sensible_heat_flux": (-500, 500), # W/m^2
"latent_heat_flux": (-500, 500), # W/m^2
"momentum_flux": (-10, 10), # N/m^2
"surface_pressure": (300, 1100), # hPa
"mean_sea_level_pressure": (870, 1090), # hPa
"tropopause_pressure": (50, 500), # hPa
"tropopause_temperature": (-100, 0), # Celsius
"precipitable_water": (0, 100), # mm
"total_cloud_cover": (0, 100), # Percentage
"low_cloud_cover": (0, 100), # Percentage
"medium_cloud_cover": (0, 100), # Percentage
"high_cloud_cover": (0, 100), # Percentage
"convective_available_potential_energy": (0, 10000), # J/kg
"convective_inhibition": (0, 1000), # J/kg
"storm_relative_helicity": (-1000, 1000), # m^2/s^2
"bulk_richardson_number": (-10, 10), # dimensionless
"lifted_condensation_level": (0, 5000), # meters
"level_of_free_convection": (0, 20000), # meters
"equilibrium_level": (0, 20000), # meters
"UKV": (250, 330), # UKV specific
}
logger.info(f"Using {provider.lower()}")
if provider.lower() == "ukv":
self.open_nwp = open_ukv
Expand All @@ -53,9 +123,37 @@ def __init__(
else:
raise ValueError(f"Unknown provider: {provider}")

def __iter__(self) -> Union[xr.DataArray, xr.Dataset]:
def __iter__(self) -> Union[xr.DataArray, xr.Dataset]: # type: ignore
"""Opens the NWP data"""
logger.debug("Opening NWP data: %s", self.zarr_path)
nwp = self.open_nwp(self.zarr_path)
if self.check_for_zeros:
self.check_if_zeros(nwp)
if self.check_physical_limits:
self.check_if_physical_limits(nwp)
while True:
yield nwp

def check_if_zeros(self, nwp: Union[xr.DataArray, xr.Dataset]):
"""Checks if the NWP data contains zeros"""
if isinstance(nwp, xr.DataArray):
if (nwp.values == 0).any():
raise ValueError("NWP DataArray contains zeros")
if isinstance(nwp, xr.Dataset):
for var in nwp:
if (nwp[var].values == 0).any():
raise ValueError(f"NWP Dataset variable{var} contains zeros")

def check_if_physical_limits(self, nwp: Union[xr.DataArray, xr.Dataset]):
"""Checks if the NWP data is within physical limits"""
if isinstance(nwp, xr.DataArray):
var_name = nwp.name
if var_name in self.limits:
lower, upper = self.limits[var_name]
if (nwp < lower).any() or (nwp > upper).any():
raise ValueError(f"NWP data {var_name} is outside physical limits")
elif isinstance(nwp, xr.Dataset):
for var_name, (lower, upper) in self.limits.items():
if var_name in nwp.variables:
if not((nwp[var_name]>=lower).all() and (nwp[var_name]<=upper).all()):
raise ValueError(f"NWP data {var_name} is outside physical limits")
1 change: 1 addition & 0 deletions tests/data/nwp_data/test_with_zeros_n_limits.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
3 changes: 3 additions & 0 deletions tests/data/nwp_data/test_with_zeros_n_limits.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
188 changes: 188 additions & 0 deletions tests/data/nwp_data/test_with_zeros_n_limits.zarr/.zmetadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
{
"metadata": {
".zattrs": {},
".zgroup": {
"zarr_format": 2
},
"UKV/.zarray": {
"chunks": [
1,
1,
10,
352,
274
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f2",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
1,
9,
10,
704,
548
],
"zarr_format": 2
},
"UKV/.zattrs": {
"Conventions": "CF-1.7",
"GRIB_centre": "egrr",
"GRIB_centreDescription": "U.K. Met Office - Exeter",
"GRIB_edition": 2,
"GRIB_subCentre": 0,
"_ARRAY_DIMENSIONS": [
"variable",
"init_time",
"step",
"y",
"x"
],
"institution": "U.K. Met Office - Exeter"
},
"init_time/.zarray": {
"chunks": [
9
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i8",
"fill_value": null,
"filters": null,
"order": "C",
"shape": [
9
],
"zarr_format": 2
},
"init_time/.zattrs": {
"_ARRAY_DIMENSIONS": [
"init_time"
],
"calendar": "proleptic_gregorian",
"long_name": "initial time of forecast",
"standard_name": "forecast_reference_time",
"units": "hours since 2020-04-01 00:00:00"
},
"step/.zarray": {
"chunks": [
10
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i8",
"fill_value": null,
"filters": null,
"order": "C",
"shape": [
10
],
"zarr_format": 2
},
"step/.zattrs": {
"_ARRAY_DIMENSIONS": [
"step"
],
"long_name": "time since forecast_reference_time",
"standard_name": "forecast_period",
"units": "hours"
},
"variable/.zarray": {
"chunks": [
1
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "|O",
"fill_value": null,
"filters": [
{
"id": "vlen-utf8"
}
],
"order": "C",
"shape": [
1
],
"zarr_format": 2
},
"variable/.zattrs": {
"_ARRAY_DIMENSIONS": [
"variable"
]
},
"x/.zarray": {
"chunks": [
548
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i4",
"fill_value": null,
"filters": null,
"order": "C",
"shape": [
548
],
"zarr_format": 2
},
"x/.zattrs": {
"_ARRAY_DIMENSIONS": [
"x"
]
},
"y/.zarray": {
"chunks": [
704
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<i4",
"fill_value": null,
"filters": null,
"order": "C",
"shape": [
704
],
"zarr_format": 2
},
"y/.zattrs": {
"_ARRAY_DIMENSIONS": [
"y"
]
}
},
"zarr_consolidated_format": 1
}
28 changes: 28 additions & 0 deletions tests/data/nwp_data/test_with_zeros_n_limits.zarr/UKV/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"chunks": [
1,
1,
10,
352,
274
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f2",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
1,
9,
10,
704,
548
],
"zarr_format": 2
}
15 changes: 15 additions & 0 deletions tests/data/nwp_data/test_with_zeros_n_limits.zarr/UKV/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Conventions": "CF-1.7",
"GRIB_centre": "egrr",
"GRIB_centreDescription": "U.K. Met Office - Exeter",
"GRIB_edition": 2,
"GRIB_subCentre": 0,
"_ARRAY_DIMENSIONS": [
"variable",
"init_time",
"step",
"y",
"x"
],
"institution": "U.K. Met Office - Exeter"
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 3ee287c

Please sign in to comment.