Skip to content

Commit

Permalink
Merge pull request #53 from EcoExtreML/soil-temp-moist
Browse files Browse the repository at this point in the history
Add soil temperature and soil moisture to ERA5 data, STEMMUS_SCOPE recipe
  • Loading branch information
SarahAlidoost authored Aug 19, 2024
2 parents 8e0bf7d + 690694b commit 67d2ea9
Show file tree
Hide file tree
Showing 27 changed files with 242 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
run: |
python3 -m pip install --upgrade pip hatch
- name: Run fast tests first
run: hatch run test
run: hatch run fast-test
- name: Run full test suite & coverage
run: hatch run test
- name: Verify that we can build the package
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ format = [
"ruff check src/ tests/ --fix --exit-non-zero-on-fix",
"lint",
]
fast-test = ["pytest -m 'not slow'"]
fast-test = ["pytest -m \"not slow\""]
test = [
"pytest ./src/zampy/ ./tests/ --doctest-modules --doctest-ignore-import-errors",
]
Expand All @@ -131,10 +131,11 @@ markers = [
ignore_missing_imports = true
disallow_untyped_defs = true
python_version = "3.10"
exclude = "tests"

[tool.ruff]
line-length = 88
exclude = ["docs", "build"]
exclude = ["docs", "build", "tests"]
target-version = "py310"

[tool.ruff.lint]
Expand Down
41 changes: 41 additions & 0 deletions recipes/STEMMUS_SCOPE_input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# config (folder, login info etc goes to a ~/.zampy/config file)
name: "STEMMUS_SCOPE_input"

download:
time: ["2020-01-01", "2020-06-30"]
bbox: [60, 10, 50, 0] # NESW
datasets:
era5_land:
variables:
- air_temperature
- dewpoint_temperature
- soil_temperature
- soil_moisture
era5:
variables:
- total_precipitation
- surface_thermal_radiation_downwards
- surface_solar_radiation_downwards
- surface_pressure
- eastward_component_of_wind
- northward_component_of_wind
eth_canopy_height:
variables:
- height_of_vegetation
fapar_lai:
variables:
- leaf_area_index
land_cover:
variables:
- land_cover
prism_dem_90:
variables:
- elevation
cams:
variables:
- co2_concentration

convert:
convention: ALMA
frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword.
resolution: 0.25 # output resolution in degrees.
1 change: 1 addition & 0 deletions src/zampy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""zampy."""

from zampy import datasets


Expand Down
8 changes: 6 additions & 2 deletions src/zampy/cli.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
"""Implements CLI interface for Zampy."""

from pathlib import Path
import click
import dask.distributed
from zampy.recipe import RecipeManager


@click.command()
@click.argument("recipe", type=click.Path(exists=True, path_type=Path))
def run_recipe(recipe: Path) -> None:
@click.option("--skip-download", is_flag=True)
def run_recipe(recipe: Path, skip_download: bool) -> None:
"""Run the recipe using the CLI."""
click.echo(f"Executing recipe: {recipe}")
rm = RecipeManager(recipe)
rm = RecipeManager(recipe, skip_download)
rm.run()


if __name__ == "__main__":
dask.distributed.Client()
run_recipe()
8 changes: 8 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,13 @@
"land_cover": {
"variable": "land_cover",
"units": ""
},
"soil_temperature": {
"variable": "SoilTemp",
"units": "kelvin"
},
"soil_moisture": {
"variable": "SoilMoist",
"units": "kilogram/meter**3"
}
}
1 change: 1 addition & 0 deletions src/zampy/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Datasets implementations."""

from zampy.datasets import dataset_protocol
from zampy.datasets import validation
from zampy.datasets.catalog import DATASETS
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Catalog of datasets."""

from zampy.datasets import dataset_protocol
from zampy.datasets.cams import CAMS
from zampy.datasets.era5 import ERA5
Expand Down
74 changes: 73 additions & 1 deletion src/zampy/datasets/cds_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""CDS utilities used by ECMWF datasets."""

from copy import copy
from pathlib import Path
import cdsapi
import numpy as np
Expand Down Expand Up @@ -44,6 +45,21 @@
"21:00", "22:00", "23:00",
] # fmt: skip

SPLIT_VARIABLES = {
"soil_temperature": (
"soil_temperature_level_1",
"soil_temperature_level_2",
"soil_temperature_level_3",
"soil_temperature_level_4",
),
"soil_moisture": (
"volumetric_soil_water_layer_1",
"volumetric_soil_water_layer_2",
"volumetric_soil_water_layer_3",
"volumetric_soil_water_layer_4",
),
}


def cds_request(
dataset: str,
Expand Down Expand Up @@ -226,6 +242,12 @@ def retrieve_era5(
# create list of year/month pairs
year_month_pairs = time_bounds_to_year_month(time_bounds)

variables = copy(variables) # Prevent original input from being modified in-place
for split_var in SPLIT_VARIABLES:
if split_var in variables:
variables.remove(split_var)
variables.extend(SPLIT_VARIABLES[split_var])

for (year, month), variable in product(
year_month_pairs, variables, position=0, leave=True
):
Expand Down Expand Up @@ -354,7 +376,8 @@ def convert_to_zampy(
print(f"File '{ncfile.name}' already exists, skipping...")
else:
ds = parse_nc_file(file)

# Rename the vswl data:
ncfile = Path(str(ncfile).replace("volumetric_soil_water", "soil_moisture"))
ds.to_netcdf(path=ncfile)


Expand All @@ -373,6 +396,28 @@ def convert_to_zampy(
"co2": "co2_concentration",
}

VAR_REFERENCE_MULTI_LAYER = {
"stl1": "soil_temperature",
"stl2": "soil_temperature",
"stl3": "soil_temperature",
"stl4": "soil_temperature",
"swvl1": "soil_moisture",
"swvl2": "soil_moisture",
"swvl3": "soil_moisture",
"swvl4": "soil_moisture",
}

LAYER_BOUNDS = {
"stl1": [[0.0, 7.0]],
"stl2": [[7.0, 28.0]],
"stl3": [[28.0, 100.0]],
"stl4": [[100.0, 289.0]],
"swvl1": [[0.0, 7.0]],
"swvl2": [[7.0, 28.0]],
"swvl3": [[28.0, 100.0]],
"swvl4": [[100.0, 289.0]],
}

WATER_DENSITY = 997.0 # kg/m3


Expand Down Expand Up @@ -416,6 +461,33 @@ def parse_nc_file(file: Path) -> xr.Dataset:
variable_name
].desc

if variable in VAR_REFERENCE_MULTI_LAYER:
if ( # Soil temperature/moisture routine
str(variable).startswith("stl") or str(variable).startswith("swvl")
):
if str(variable).startswith("swvl"):
varname = "soil_moisture"
standard_name = "moisture_content_of_soil_layer"
ds[variable] *= WATER_DENSITY
ds[variable].attrs.update({"units": "kg m**-3"})
else:
varname = "soil_temperature"
standard_name = "temperature_in_ground"

da = ds[variable]
name = str(da.name)
da = da.expand_dims({"depth": [np.mean(LAYER_BOUNDS[name])]})
da = da.rename(varname)
da.attrs.update(
{
"long_name": varname.replace("_", " "),
"standard_name": standard_name,
}
)

ds = da.to_dataset()
ds["depth_bounds"] = (("depth", "nv"), LAYER_BOUNDS[name])

# TODO: add dataset attributes.

return ds
1 change: 1 addition & 0 deletions src/zampy/datasets/converter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Dataset formatter for different conventions."""

import json
import warnings
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/dataset_protocol.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Outline of the dataset protocol."""

import json
import shutil
from dataclasses import dataclass
Expand Down
17 changes: 16 additions & 1 deletion src/zampy/datasets/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,30 @@ class ERA5Land(ECMWFDataset): # noqa: D101
raw_variables = [
Variable(name="t2m", unit=unit_registry.kelvin),
Variable(name="d2m", unit=unit_registry.kelvin),
Variable(name="st", unit=unit_registry.kelvin),
Variable(name="swvl", unit=unit_registry.fraction),
]

# variable names used in cdsapi downloading request
cds_var_names = {
"air_temperature": "2m_temperature",
"dewpoint_temperature": "2m_dewpoint_temperature",
"soil_temperature_level_1": "soil_temperature_level_1", # Note: split variables
"soil_temperature_level_2": "soil_temperature_level_2",
"soil_temperature_level_3": "soil_temperature_level_3",
"soil_temperature_level_4": "soil_temperature_level_4",
"volumetric_soil_water_layer_1": "volumetric_soil_water_layer_1",
"volumetric_soil_water_layer_2": "volumetric_soil_water_layer_2",
"volumetric_soil_water_layer_3": "volumetric_soil_water_layer_3",
"volumetric_soil_water_layer_4": "volumetric_soil_water_layer_4",
}

variable_names = list(cds_var_names.keys())
variable_names = [
"air_temperature",
"dewpoint_temperature",
"soil_temperature",
"soil_moisture",
]

variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

Expand Down
14 changes: 11 additions & 3 deletions src/zampy/datasets/eth_canopy_height.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""ETH canopy height dataset."""

import gzip
from pathlib import Path
import numpy as np
Expand Down Expand Up @@ -269,10 +270,17 @@ def parse_tiff_file(file: Path, sd_file: bool = False) -> xr.Dataset:
da = da.isel(band=0) # get rid of band dim
da = da.drop_vars(["band", "spatial_ref"]) # drop unnecessary coords
ds = da.to_dataset()
ds = ds.assign_coords( # halfway in the year
{"time": np.datetime64("2020-07-01").astype("datetime64[ns]")}
ds = xr.concat( # Cover entirety of 2020
(
ds.assign_coords(
{"time": np.datetime64("2020-01-01").astype("datetime64[ns]")}
),
ds.assign_coords(
{"time": np.datetime64("2021-01-01").astype("datetime64[ns]")}
),
),
dim="time",
)
ds = ds.expand_dims("time")
ds = ds.rename(
{
"band_data": "height_of_vegetation_standard_deviation"
Expand Down
8 changes: 7 additions & 1 deletion src/zampy/datasets/fapar_lai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Implementation of the FAPAR LAI dataset."""

import os
import shutil
import tempfile
import zipfile
Expand Down Expand Up @@ -119,7 +120,11 @@ def ingest(

# netCDF files follow CF-1.6, only unpacking the archives is required.
for file in zip_files:
with tempfile.TemporaryDirectory(dir=tmp_path) as _tmpdir:
with tempfile.TemporaryDirectory(
dir=tmp_path,
# cleanup fails on windows. No clear idea on how to fix this.
ignore_cleanup_errors=True if os.name == "nt" else False,
) as _tmpdir:
tmpdir = Path(_tmpdir)

extract_fapar_zip(
Expand Down Expand Up @@ -257,6 +262,7 @@ def ingest_ncfile(ncfile: Path, ingest_folder: Path) -> None:
path=ingest_folder / ncfile.name,
encoding={"leaf_area_index": {"zlib": True, "complevel": 3}},
)
ds.close() # explicitly close to release file to system (for Windows)


def extract_fapar_zip(
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/prism_dem.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Prism DEM dataset."""

import gzip
import tarfile
from pathlib import Path
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Shared utilities from datasets."""

import urllib.request
from pathlib import Path
import requests
Expand Down
1 change: 1 addition & 0 deletions src/zampy/datasets/validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Checks for user input validation."""

from pathlib import Path
from zampy.datasets.dataset_protocol import Dataset
from zampy.datasets.dataset_protocol import SpatialBounds
Expand Down
Loading

0 comments on commit 67d2ea9

Please sign in to comment.