diff --git a/docs/configuration.md b/docs/configuration.md index 5455114..2b1299c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -23,15 +23,25 @@ The configuration file should contain the `working_directory`, for instance: working_directory: /path_to_a_working_directory/ #for example: /home/bart/Zampy ``` -If you need access to data on CDS or ADS server, you should add your CDS or ADS credentials to `zampy_config.yml`: +The old Climate Data Store (CDS) is shut down on 3 September 2024. For more +information see: +[the-new-climate-data-store-beta](https://forum.ecmwf.int/t/the-new-climate-data-store-beta-cds-beta-is-now-live/3315). +To use the new CDS/ADS, you need to have an ECMWF account, your existing CDS/ADS +credentials does not work. + +If you need access to data on CDS or ADS server, you should add your CDS/ADS +credentials to `zampy_config.yml`. To find your key, go to [CDS how to +api](https://cds.climate.copernicus.eu/how-to-api), or [ADS how to +api](https://ads.atmosphere.copernicus.eu/how-to-api). You can skip the steps +related to `.cdsapirc` and simply add the key to `zampy_config.yml`: ```yaml cdsapi: - url: # for example https://cds.climate.copernicus.eu/api/v2 - key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 + url: # for example https://cds.climate.copernicus.eu/api + key: # for example xhashd-232jcsha-dsaj429-cdjajd29319 adsapi: - url: # for example https://ads.atmosphere.copernicus.eu/api/v2 - key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 + url: # for example https://ads.atmosphere.copernicus.eu/api + key: # for example xhashd-232jcsha-dsaj429-cdjajd29319 ``` ## Instructions for CDS/ADS datasets @@ -45,9 +55,6 @@ To download the following datasets, users need access to CDS/ADS via `cdsapi`/`a - ADS - CAMS EGG4 (e.g. co2) -To generate these API keys, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome), or on *ADS* via the [registration page](https://ads.atmosphere.copernicus.eu/user/register?destination=%2F%23!%2Fhome). - -Before submitting any request with `zampy`, please put your `cdsapi`/`adsapi` credentials in `zampy_config.yml`. Here is a short [instruction](https://cds.climate.copernicus.eu/api-how-to) about how to find your CDS/ADS API key. You can skip the steps related to `.cdsapirc` and simply add the key to `zampy_config.yml`. ### Agree to the Terms of Use on CDS/ADS diff --git a/docs/index.md b/docs/index.md index ec3a971..f1837c4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -52,7 +52,7 @@ download: cams: variables: - co2_concentration - + convert: convention: ALMA frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. @@ -67,6 +67,16 @@ When you have your reciped created and saved on your disk, you can execute your zampy /path_to_recipe/sample_recipe.yml ``` +!!! note + + You may recieve an error message from CDS/ADS if not all the required + licences have been accepted. Follow the instructions in the error message to + accept the licences and run the recipe again. + +When downloading process starts, you can also check the status of your requests +in your CDS/ADS profile. + + ### Interact with `zampy` in notebooks It is possible to use `zampy` directly in Python via its Python API. This is not recommended, as it is more difficult to reproduce the workflow if there is no recipe. diff --git a/pyproject.toml b/pyproject.toml index 468e08e..ac61248 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ "pint", "cf_xarray", # required to auto-pint CF compliant datasets. "pint-xarray", - "cdsapi", + "cdsapi>=0.7.2", "xarray-regrid", # for regridding ] dynamic = ["version"] diff --git a/recipes/STEMMUS_SCOPE_input.yml b/recipes/STEMMUS_SCOPE_input.yml index f296ac0..f98bd90 100644 --- a/recipes/STEMMUS_SCOPE_input.yml +++ b/recipes/STEMMUS_SCOPE_input.yml @@ -2,7 +2,7 @@ name: "STEMMUS_SCOPE_input" download: - time: ["2020-01-01", "2020-06-30"] + time: ["2020-01-01", "2020-02-15"] bbox: [60, 10, 50, 0] # NESW datasets: era5_land: @@ -37,5 +37,5 @@ download: convert: convention: ALMA - frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. + frequency: 1h # outputs at 1 hour frequency. Pandas-like freq-keyword. resolution: 0.25 # output resolution in degrees. diff --git a/src/zampy/datasets/cds_utils.py b/src/zampy/datasets/cds_utils.py index c223a9a..4966344 100644 --- a/src/zampy/datasets/cds_utils.py +++ b/src/zampy/datasets/cds_utils.py @@ -35,10 +35,10 @@ "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", - "31", + "31", ] # fmt: skip -ALL_HOURS = [ +ALL_HOURS = [ "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", @@ -97,11 +97,13 @@ def cds_request( url, api_key = cds_api_key(fname) + # TODO: expose timeout, see issue 64 c = cdsapi.Client( url=url, key=api_key, verify=True, quiet=True, + timeout=300, ) # choose retrieve function retrieve_func = RETRIEVE_FUNCTION[fname] @@ -124,7 +126,8 @@ def cds_request_land_cover( dataset: str, time_bounds: TimeBounds, path: Path, - overwrite: bool, + spatial_bounds: SpatialBounds | None = None, + overwrite: bool = False, ) -> None: """Download land cover data via CDS API. @@ -136,6 +139,7 @@ def cds_request_land_cover( dataset: Dataset name for retrieval via `cdsapi`. time_bounds: Zampy time bounds object. path: File path to which the data should be saved. + spatial_bounds: Zampy spatial bounds object. overwrite: If an existing file (of the same size!) should be overwritten. """ fname = PRODUCT_FNAME[dataset] @@ -152,11 +156,19 @@ def cds_request_land_cover( years_months = time_bounds_to_year_month(time_bounds) years = {year for (year, _) in years_months} + if spatial_bounds is not None: + area = [ + spatial_bounds.north, + spatial_bounds.west, + spatial_bounds.south, + spatial_bounds.east, + ] + for year in tqdm(years): if int(year) < 2016: - version = "v2.0.7cds" + version = "v2_0_7cds" else: - version = "v2.1.1" + version = "v2_1_1" r = c.retrieve( dataset, { @@ -164,6 +176,7 @@ def cds_request_land_cover( "format": "zip", "year": year, "version": version, + "area": area, }, ) fpath = path / f"{fname}_LCCS_MAP_300m_{year}.zip" @@ -348,7 +361,7 @@ def _check_and_download( def time_bounds_to_year_month(time_bounds: TimeBounds) -> list[tuple[str, str]]: """Return year/month pairs.""" - date_range = pd.date_range(start=time_bounds.start, end=time_bounds.end, freq="M") + date_range = pd.date_range(start=time_bounds.start, end=time_bounds.end, freq="ME") year_month_pairs = [(str(date.year), str(date.month)) for date in date_range] return year_month_pairs diff --git a/src/zampy/datasets/ecmwf_dataset.py b/src/zampy/datasets/ecmwf_dataset.py index 87f39bc..0ffd19c 100644 --- a/src/zampy/datasets/ecmwf_dataset.py +++ b/src/zampy/datasets/ecmwf_dataset.py @@ -120,11 +120,19 @@ def load( files += (ingest_dir / self.name).glob(f"{self.name}_{var}*.nc") ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) - ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) + # rename valid_time to time + if "valid_time" in ds.dims: + ds = ds.rename({"valid_time": "time"}) + + ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) grid = xarray_regrid.create_regridding_dataset( make_grid(spatial_bounds, resolution) ) + + # this is needed before regrid + ds = ds.unify_chunks() + ds = ds.regrid.linear(grid) return ds diff --git a/src/zampy/datasets/eth_canopy_height.py b/src/zampy/datasets/eth_canopy_height.py index 48953a2..ede1a7e 100644 --- a/src/zampy/datasets/eth_canopy_height.py +++ b/src/zampy/datasets/eth_canopy_height.py @@ -243,7 +243,8 @@ def convert_tiff_to_netcdf( ds = parse_tiff_file(file, sd_file) # Coarsen the data to be 1/100 deg resolution instead of 1/12000 - ds = ds.coarsen({"latitude": 120, "longitude": 120}).mean() # type: ignore + if len(ds.latitude) >= 120 and len(ds.longitude) >= 120: + ds = ds.coarsen({"latitude": 120, "longitude": 120}).mean() # type: ignore ds = ds.compute() ds = ds.interpolate_na(dim="longitude", limit=1) ds = ds.interpolate_na(dim="latitude", limit=1) diff --git a/src/zampy/datasets/fapar_lai.py b/src/zampy/datasets/fapar_lai.py index fdfef0c..f33df96 100644 --- a/src/zampy/datasets/fapar_lai.py +++ b/src/zampy/datasets/fapar_lai.py @@ -150,8 +150,7 @@ def load( variable_names: list[str], ) -> xr.Dataset: files = list((ingest_dir / self.name).glob("*.nc")) - - ds = xr.open_mfdataset(files, parallel=True) + ds = xr.open_mfdataset(files) # see issue 65 ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) grid = xarray_regrid.create_regridding_dataset( @@ -223,7 +222,7 @@ def download_fapar_lai( "format": "zip", "variable": "lai", "horizontal_resolution": "1km", - "product_version": "V3", + "product_version": "v3", "satellite": "spot" if year < 2014 else "proba", "sensor": "vgt", "month": f"{month:0>2}", diff --git a/src/zampy/datasets/land_cover.py b/src/zampy/datasets/land_cover.py index 0ae9b78..ec30cc4 100644 --- a/src/zampy/datasets/land_cover.py +++ b/src/zampy/datasets/land_cover.py @@ -3,6 +3,7 @@ from pathlib import Path from tempfile import TemporaryDirectory from zipfile import ZipFile +import dask.array import numpy as np import xarray as xr import xarray_regrid @@ -82,6 +83,7 @@ def download( cds_utils.cds_request_land_cover( dataset=self.cds_dataset, time_bounds=time_bounds, + spatial_bounds=spatial_bounds, path=download_folder, overwrite=overwrite, ) @@ -134,16 +136,28 @@ def load( ) raise ValueError(msg) files = list((ingest_dir / self.name).glob(f"{self.name}_*.nc")) - ds = xr.open_mfdataset(files, chunks={"latitude": 200, "longitude": 200}) ds = ds.sel(time=slice(time_bounds.start, time_bounds.end)) grid = xarray_regrid.create_regridding_dataset( utils.make_grid(spatial_bounds, resolution) ) - ds = ds.regrid.most_common(grid, time_dim="time", max_mem=1e9) - return ds + ds_regrid = {} + for variable in variable_names: + # select the variable to be regridded + da = ds[variable] + + # get values for most common method + regrid_values = get_unique_values(da) + + da_regrid = da.regrid.most_common(grid, values=regrid_values) + + # make sure dtype is the same + # in the xarray_regrid> v0.4.0, this might not be necessary + ds_regrid[variable] = da_regrid.astype(da.dtype) + + return xr.Dataset(ds_regrid) def convert( self, @@ -207,7 +221,7 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: # only keep land cover class variable with xr.open_dataset(unzip_folder / zipped_file_name) as ds: - var_list = [var for var in ds.data_vars] + var_list = list(ds.data_vars) raw_variable = "lccs_class" var_list.remove(raw_variable) ds = ds.drop_vars(var_list) # noqa: PLW2901 @@ -215,19 +229,29 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: ds = ds.sortby(["lat", "lon"]) # noqa: PLW2901 ds = ds.rename({"lat": "latitude", "lon": "longitude"}) # noqa: PLW2901 new_grid = xarray_regrid.Grid( - north=90, - east=180, - south=-90, - west=-180, + north=ds["latitude"].max().item(), + east=ds["longitude"].max().item(), + south=ds["latitude"].min().item(), + west=ds["longitude"].min().item(), resolution_lat=0.05, resolution_lon=0.05, ) target_dataset = xarray_regrid.create_regridding_dataset(new_grid) - ds_regrid = ds.regrid.most_common( - target_dataset, time_dim="time", max_mem=1e9 - ) + # select the variable to be regridded + da = ds[raw_variable] + + # get values for most common method + regrid_values = get_unique_values(da) + + da_regrid = da.regrid.most_common(target_dataset, values=regrid_values) + + # make sure dtype is the same + da_regrid = da_regrid.astype(da.dtype) + + # convert dataarray to dataset + ds_regrid = da_regrid.to_dataset() # rename variable to follow the zampy convention variable_name = "land_cover" @@ -240,3 +264,18 @@ def extract_netcdf_to_zampy(file: Path) -> xr.Dataset: ].desc return ds_regrid + + +def get_unique_values(da: xr.DataArray) -> np.ndarray: + """Get unique values of a land cover DataArray.""" + if "flag_values" in da.attrs: + unique_values = da.attrs["flag_values"] + else: + # Convert to Dask array if not already + if not isinstance(da.data, dask.array.Array): + dask_array = dask.array.from_array(da.values, chunks="auto") + else: + dask_array = da.data + # Use Dask's unique function + unique_values = dask.array.unique(dask_array).compute() + return unique_values diff --git a/src/zampy/datasets/utils.py b/src/zampy/datasets/utils.py index a737baa..1e3a237 100644 --- a/src/zampy/datasets/utils.py +++ b/src/zampy/datasets/utils.py @@ -43,10 +43,13 @@ def download_url(url: str, fpath: Path, overwrite: bool) -> None: print(f"File '{fpath.name}' already exists, skipping...") -def get_url_size(url: str) -> int: +def get_url_size(url: str) -> int | None: """Return the size (bytes) of a given URL.""" response = requests.head(url) - return int(response.headers["Content-Length"]) + content_length = response.headers.get("Content-Length") + if content_length: + return int(content_length) + return None def get_file_size(fpath: Path) -> int: diff --git a/src/zampy/recipe.py b/src/zampy/recipe.py index 63a360b..782363e 100644 --- a/src/zampy/recipe.py +++ b/src/zampy/recipe.py @@ -137,13 +137,19 @@ def run(self) -> None: ds = converter.convert(ds, dataset, convention=self.convention) if "time" in ds.dims: # Dataset with only DEM (e.g.) has no time dim. - freq = xr.infer_freq(ds["time"]) - if freq is None: # fallback: - freq = ( + data_freq = None + + if len(ds["time"]) == 1: + data_freq = pd.Timedelta(self.frequency) + elif len(ds["time"]) > 3: # see pandas _FrequencyInferer + freq = xr.infer_freq(ds["time"]) + data_freq = pd.to_timedelta(pd.tseries.frequencies.to_offset(freq)) + + if data_freq is None: # fallback: + data_freq = pd.Timedelta( ds["time"].isel(time=1).to_numpy() - ds["time"].isel(time=0).to_numpy() ) - data_freq = pd.to_timedelta(pd.tseries.frequencies.to_offset(freq)) if data_freq < pd.Timedelta(self.frequency): ds = ds.resample(time=self.frequency).mean() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2eeadb8 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,34 @@ +"""This module contains all tests for datasets included in zampy.""" + +from pathlib import Path +from zampy.recipe import config_loader +from . import generate_test_data + + +test_folder = Path(__file__).resolve().parents[0] +data_folder = test_folder / "test_data" + +ALL_DAYS = [ + "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", + "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", + "31", +] # fmt: skip + +ALL_HOURS = [ + "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", + "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", + "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", + "21:00", "22:00", "23:00", +] # fmt: skip + + +if not data_folder.exists(): + # This should be run locally! + # Generate test data if it does not exist + # it assumes that the recipe + # zampy/recipes/STEMMUS_SCOPE_input.yml has been ran. + config = config_loader() + download_dir = Path(config["working_directory"]) / "download" + data_folder.mkdir(parents=True, exist_ok=True) + generate_test_data.generate_test_data(download_dir, data_folder) diff --git a/tests/generate_test_data.py b/tests/generate_test_data.py new file mode 100644 index 0000000..293eb52 --- /dev/null +++ b/tests/generate_test_data.py @@ -0,0 +1,151 @@ +"""Generates test data for running the tests. + +The recipe zampy/recipes/STEMMUS_SCOPE_input.yml should be used to download the +data before running this script. +""" + +import shutil +import xarray as xr + + +TEST_DATA_NAME = { + "cams": "cams_co2_concentration_2020_01_01-2020_02_15.nc", + "era5": [ + "era5_northward_component_of_wind_2020-1.nc", + "era5_eastward_component_of_wind_2020-1.nc", + "era5_total_precipitation_2020-1.nc", + "era5_surface_pressure_2020-1.nc", + "era5_surface_solar_radiation_downwards_2020-1.nc", + "era5_surface_thermal_radiation_downwards_2020-1.nc", + ], + "era5-land": [ + "era5-land_dewpoint_temperature_2020-1.nc", + "era5-land_air_temperature_2020-1.nc", + ], + "eth-canopy-height": "ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif", + "fapar-lai": "satellite-lai-fapar_2020-1.zip", + "land-cover": "land-cover_LCCS_MAP_300m_2020.zip", + "prism-dem-90": "Copernicus_DSM_30_N50_00_E000_00.tar", +} + + +def _subset_ncfile(input_file, output_file): + ds = xr.open_dataset(input_file) + subset = ds.isel( + valid_time=slice(0, min(4, ds.valid_time.size)), + latitude=slice(0, min(4, ds.latitude.size)), + longitude=slice(0, min(4, ds.longitude.size)), + ) + subset.to_netcdf(output_file) + + +def _subset_tiffile(input_file, output_file): + da = xr.open_dataarray(input_file, engine="rasterio", chunks={"x": 200, "y": 200}) + subset = da.isel( + x=slice(0, min(4, da.x.size)), + y=slice(0, min(4, da.y.size)), + ) + subset.rio.to_raster(output_file) + + +def _subset_zipfile_include_ncfiles(input_file, output_dir): + format = input_file.suffix.lstrip(".") + zip_file_name = input_file.stem + temp_dir = output_dir / zip_file_name + temp_dir.mkdir(parents=True, exist_ok=True) + + shutil.unpack_archive(input_file, extract_dir=output_dir, format=format) + ncfiles = output_dir.glob("*.nc") + + for ncfile in ncfiles: + ds = xr.open_dataset(ncfile) + # select a subset of the data + subset = ds.isel( + time=slice(0, min(100, ds.time.size)), + lat=slice(0, min(100, ds.lat.size)), + lon=slice(0, min(100, ds.lon.size)), + ) + + # remove the original file + ncfile.unlink() + + # Save back to the original format + subset.to_netcdf(temp_dir / ncfile.name) + + # archive back to zip + base_name = output_dir / zip_file_name + shutil.make_archive(base_name, format, temp_dir) + + # remove the temp_dir + shutil.rmtree(temp_dir) + + +def _subset_tarfile_include_tiffiles(input_file, output_dir): + format = input_file.suffix.lstrip(".") + zip_file_name = input_file.stem + temp_dir = output_dir / zip_file_name + temp_dir.mkdir(parents=True, exist_ok=True) + + shutil.unpack_archive(input_file, extract_dir=temp_dir, format=format) + tifffile = list((temp_dir / zip_file_name / "DEM").glob("*.tif"))[0] + + da = xr.open_dataarray(tifffile, engine="rasterio", chunks={"x": 200, "y": 200}) + + # select a subset of the data + subset = da.isel( + x=slice(0, min(100, da.x.size)), + y=slice(0, min(100, da.y.size)), + ) + + # remove the original file + tifffile.unlink() + + # Save back to the original format as a tif + subset.rio.to_raster(tifffile) + + # archive back to zip + base_name = output_dir / zip_file_name + root_dir = output_dir / zip_file_name + shutil.make_archive(base_name, format, root_dir, zip_file_name) + + # remove the temp_dir + shutil.rmtree(root_dir) + + +def prepare_dataset(path, data_name, output_dir): + """Open a dataset as an xarray dataset.""" + if data_name in {"cams", "era5", "era5-land"}: + _subset_ncfile(path, output_dir / path.name) + + if data_name in {"eth-canopy-height"}: + _subset_tiffile(path, output_dir / path.name) + + if data_name in {"fapar-lai", "land-cover"}: + _subset_zipfile_include_ncfiles(path, output_dir) + + if data_name == "prism-dem-90": + _subset_tarfile_include_tiffiles(path, output_dir) + + +def generate_test_data(data_dir, test_dir): + """Generate test data for the recipe tests.""" + + for data_name in TEST_DATA_NAME.keys(): + output_dir = test_dir / data_name + output_dir.mkdir(parents=True, exist_ok=True) + + # copy properties.json + shutil.copy(data_dir / data_name / "properties.json", output_dir) + + # subset data + if isinstance(TEST_DATA_NAME[data_name], list): + for file_name in TEST_DATA_NAME[data_name]: + path = data_dir / data_name / file_name + prepare_dataset(path, data_name, output_dir) + else: + path = data_dir / data_name / TEST_DATA_NAME[data_name] + prepare_dataset(path, data_name, output_dir) + + print(f"Generated test data for {data_name} in {output_dir}") + + print("Done generating test data.") diff --git a/tests/test_cds_utils.py b/tests/test_cds_utils.py index 2fd5e2c..ec44293 100644 --- a/tests/test_cds_utils.py +++ b/tests/test_cds_utils.py @@ -5,12 +5,12 @@ import numpy as np import pytest import xarray as xr -from test_datasets import ALL_DAYS -from test_datasets import ALL_HOURS -from test_datasets import data_folder from zampy.datasets import cds_utils from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds +from . import ALL_DAYS +from . import ALL_HOURS +from . import data_folder @pytest.fixture(scope="function") @@ -118,7 +118,7 @@ def test_cds_request_land_cover(mock_retrieve, valid_path_config): """ "Test cds request for downloading data from CDS server.""" dataset = "satellite-land-cover" time_bounds = TimeBounds( - np.datetime64("1996-01-01T00:00:00"), np.datetime64("1996-12-31T00:00:00") + np.datetime64("2020-01-01T00:00:00"), np.datetime64("2020-12-31T00:00:00") ) path = Path(__file__).resolve().parent overwrite = True @@ -130,6 +130,7 @@ def test_cds_request_land_cover(mock_retrieve, valid_path_config): dataset, time_bounds, path, + SpatialBounds(54, 56, 1, 3), overwrite, ) @@ -138,8 +139,9 @@ def test_cds_request_land_cover(mock_retrieve, valid_path_config): { "variable": "all", "format": "zip", - "year": "1996", - "version": "v2.0.7cds", + "year": "2020", + "version": "v2_1_1", + "area": [54, 3, 1, 56], }, ) @@ -183,11 +185,11 @@ def test_convert_to_zampy(dummy_dir): ingest_folder = Path(data_folder, "era5") cds_utils.convert_to_zampy( ingest_folder=Path(dummy_dir), - file=Path(ingest_folder, "era5_northward_component_of_wind_1996-1.nc"), + file=Path(ingest_folder, "era5_northward_component_of_wind_2020-1.nc"), overwrite=True, ) - ds = xr.load_dataset(Path(dummy_dir, "era5_northward_component_of_wind_1996-1.nc")) + ds = xr.open_dataset(Path(dummy_dir, "era5_northward_component_of_wind_2020-1.nc")) assert list(ds.data_vars)[0] == "northward_component_of_wind" @@ -200,7 +202,7 @@ def test_parse_nc_file_10m_wind(self): variables = ["northward_component_of_wind", "eastward_component_of_wind"] for variable in variables: ds = cds_utils.parse_nc_file( - data_folder / "era5" / f"era5_{variable}_1996-1.nc" + data_folder / "era5" / f"era5_{variable}_2020-1.nc" ) expected_var_name = variable assert list(ds.data_vars)[0] == expected_var_name @@ -213,11 +215,11 @@ def test_parse_nc_file_radiation(self): "surface_solar_radiation_downwards": "ssrd", } for variable in variables: - ds_original = xr.load_dataset( - data_folder / "era5" / f"era5_{variable}_1996-1.nc" + ds_original = xr.open_dataset( + data_folder / "era5" / f"era5_{variable}_2020-1.nc" ) ds = cds_utils.parse_nc_file( - data_folder / "era5" / f"era5_{variable}_1996-1.nc" + data_folder / "era5" / f"era5_{variable}_2020-1.nc" ) assert list(ds.data_vars)[0] == variable @@ -230,11 +232,11 @@ def test_parse_nc_file_radiation(self): def test_parse_nc_file_precipitation(self): """Test parsing netcdf file function with precipitation.""" - ds_original = xr.load_dataset( - data_folder / "era5" / "era5_total_precipitation_1996-1.nc" + ds_original = xr.open_dataset( + data_folder / "era5" / "era5_total_precipitation_2020-1.nc" ) ds = cds_utils.parse_nc_file( - data_folder / "era5" / "era5_total_precipitation_1996-1.nc" + data_folder / "era5" / "era5_total_precipitation_2020-1.nc" ) expected_var_name = "total_precipitation" @@ -249,7 +251,7 @@ def test_parse_nc_file_precipitation(self): def test_parse_nc_file_pressure(self): """Test parsing netcdf file function with surface pressure.""" ds = cds_utils.parse_nc_file( - data_folder / "era5" / "era5_surface_pressure_1996-1.nc" + data_folder / "era5" / "era5_surface_pressure_2020-1.nc" ) expected_var_name = "surface_pressure" @@ -259,7 +261,7 @@ def test_parse_nc_file_pressure(self): def test_parse_nc_file_air_temperature(self): """Test parsing netcdf file function with 2 meter temperature.""" ds = cds_utils.parse_nc_file( - data_folder / "era5-land" / "era5-land_air_temperature_1996-1.nc" + data_folder / "era5-land" / "era5-land_air_temperature_2020-1.nc" ) expected_var_name = "air_temperature" @@ -269,7 +271,7 @@ def test_parse_nc_file_air_temperature(self): def test_parse_nc_file_dew_temperature(self): """Test parsing netcdf file function with 2 meter dewpoint temperature.""" ds = cds_utils.parse_nc_file( - data_folder / "era5-land" / "era5-land_dewpoint_temperature_1996-1.nc" + data_folder / "era5-land" / "era5-land_dewpoint_temperature_2020-1.nc" ) expected_var_name = "dewpoint_temperature" @@ -279,7 +281,7 @@ def test_parse_nc_file_dew_temperature(self): def test_parse_nc_file_co2_concentration(self): """Test parsing netcdf file function with co2 concentration.""" ds = cds_utils.parse_nc_file( - data_folder / "cams" / "cams_co2_concentration_2003_01_02-2003_01_04.nc" + data_folder / "cams" / "cams_co2_concentration_2020_01_01-2020_02_15.nc" ) expected_var_name = "co2_concentration" diff --git a/tests/test_converter.py b/tests/test_converter.py index ca2239c..10bd572 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -4,10 +4,10 @@ import numpy as np import pytest import xarray as xr -from test_datasets import data_folder from zampy.datasets import converter from zampy.datasets.catalog import EthCanopyHeight from zampy.datasets.eth_canopy_height import parse_tiff_file +from . import data_folder path_dummy_data = data_folder / "eth-canopy-height" diff --git a/tests/test_data/cams/cams_co2_concentration_2003_01_02-2003_01_04.nc b/tests/test_data/cams/cams_co2_concentration_2003_01_02-2003_01_04.nc deleted file mode 100644 index 74c810e..0000000 Binary files a/tests/test_data/cams/cams_co2_concentration_2003_01_02-2003_01_04.nc and /dev/null differ diff --git a/tests/test_data/cams/cams_co2_concentration_2020_01_01-2020_02_15.nc b/tests/test_data/cams/cams_co2_concentration_2020_01_01-2020_02_15.nc new file mode 100644 index 0000000..d3b18ed Binary files /dev/null and b/tests/test_data/cams/cams_co2_concentration_2020_01_01-2020_02_15.nc differ diff --git a/tests/test_data/cams/properties.json b/tests/test_data/cams/properties.json index e69de29..01d8b7d 100644 --- a/tests/test_data/cams/properties.json +++ b/tests/test_data/cams/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "co2_concentration" + ] +} \ No newline at end of file diff --git a/tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc b/tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc deleted file mode 100644 index 278c95a..0000000 Binary files a/tests/test_data/era5-land/era5-land_air_temperature_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5-land/era5-land_air_temperature_2020-1.nc b/tests/test_data/era5-land/era5-land_air_temperature_2020-1.nc new file mode 100644 index 0000000..4df0eb2 Binary files /dev/null and b/tests/test_data/era5-land/era5-land_air_temperature_2020-1.nc differ diff --git a/tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc b/tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc deleted file mode 100644 index c7db40b..0000000 Binary files a/tests/test_data/era5-land/era5-land_dewpoint_temperature_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5-land/era5-land_dewpoint_temperature_2020-1.nc b/tests/test_data/era5-land/era5-land_dewpoint_temperature_2020-1.nc new file mode 100644 index 0000000..6a2322e Binary files /dev/null and b/tests/test_data/era5-land/era5-land_dewpoint_temperature_2020-1.nc differ diff --git a/tests/test_data/era5-land/properties.json b/tests/test_data/era5-land/properties.json index e69de29..dd3a0ce 100644 --- a/tests/test_data/era5-land/properties.json +++ b/tests/test_data/era5-land/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "soil_moisture" + ] +} \ No newline at end of file diff --git a/tests/test_data/era5/era5_eastward_component_of_wind_1996-1.nc b/tests/test_data/era5/era5_eastward_component_of_wind_1996-1.nc deleted file mode 100644 index 202dfb3..0000000 Binary files a/tests/test_data/era5/era5_eastward_component_of_wind_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_eastward_component_of_wind_2020-1.nc b/tests/test_data/era5/era5_eastward_component_of_wind_2020-1.nc new file mode 100644 index 0000000..f6043c3 Binary files /dev/null and b/tests/test_data/era5/era5_eastward_component_of_wind_2020-1.nc differ diff --git a/tests/test_data/era5/era5_northward_component_of_wind_1996-1.nc b/tests/test_data/era5/era5_northward_component_of_wind_1996-1.nc deleted file mode 100644 index f5ba129..0000000 Binary files a/tests/test_data/era5/era5_northward_component_of_wind_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_northward_component_of_wind_2020-1.nc b/tests/test_data/era5/era5_northward_component_of_wind_2020-1.nc new file mode 100644 index 0000000..6a7a11d Binary files /dev/null and b/tests/test_data/era5/era5_northward_component_of_wind_2020-1.nc differ diff --git a/tests/test_data/era5/era5_surface_pressure_1996-1.nc b/tests/test_data/era5/era5_surface_pressure_1996-1.nc deleted file mode 100644 index a69a103..0000000 Binary files a/tests/test_data/era5/era5_surface_pressure_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_surface_pressure_2020-1.nc b/tests/test_data/era5/era5_surface_pressure_2020-1.nc new file mode 100644 index 0000000..565d408 Binary files /dev/null and b/tests/test_data/era5/era5_surface_pressure_2020-1.nc differ diff --git a/tests/test_data/era5/era5_surface_solar_radiation_downwards_1996-1.nc b/tests/test_data/era5/era5_surface_solar_radiation_downwards_1996-1.nc deleted file mode 100644 index 559909f..0000000 Binary files a/tests/test_data/era5/era5_surface_solar_radiation_downwards_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_surface_solar_radiation_downwards_2020-1.nc b/tests/test_data/era5/era5_surface_solar_radiation_downwards_2020-1.nc new file mode 100644 index 0000000..af3f20c Binary files /dev/null and b/tests/test_data/era5/era5_surface_solar_radiation_downwards_2020-1.nc differ diff --git a/tests/test_data/era5/era5_surface_thermal_radiation_downwards_1996-1.nc b/tests/test_data/era5/era5_surface_thermal_radiation_downwards_1996-1.nc deleted file mode 100644 index 2528c69..0000000 Binary files a/tests/test_data/era5/era5_surface_thermal_radiation_downwards_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_surface_thermal_radiation_downwards_2020-1.nc b/tests/test_data/era5/era5_surface_thermal_radiation_downwards_2020-1.nc new file mode 100644 index 0000000..ab183f8 Binary files /dev/null and b/tests/test_data/era5/era5_surface_thermal_radiation_downwards_2020-1.nc differ diff --git a/tests/test_data/era5/era5_total_precipitation_1996-1.nc b/tests/test_data/era5/era5_total_precipitation_1996-1.nc deleted file mode 100644 index d0018cc..0000000 Binary files a/tests/test_data/era5/era5_total_precipitation_1996-1.nc and /dev/null differ diff --git a/tests/test_data/era5/era5_total_precipitation_2020-1.nc b/tests/test_data/era5/era5_total_precipitation_2020-1.nc new file mode 100644 index 0000000..71fa96b Binary files /dev/null and b/tests/test_data/era5/era5_total_precipitation_2020-1.nc differ diff --git a/tests/test_data/era5/properties.json b/tests/test_data/era5/properties.json index e69de29..345283b 100644 --- a/tests/test_data/era5/properties.json +++ b/tests/test_data/era5/properties.json @@ -0,0 +1,16 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "total_precipitation", + "surface_thermal_radiation_downwards", + "surface_solar_radiation_downwards", + "surface_pressure", + "eastward_component_of_wind", + "northward_component_of_wind" + ] +} \ No newline at end of file diff --git a/tests/test_data/eth-canopy-height/ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif b/tests/test_data/eth-canopy-height/ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif index c30681b..759ab1d 100644 Binary files a/tests/test_data/eth-canopy-height/ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif and b/tests/test_data/eth-canopy-height/ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.tif differ diff --git a/tests/test_data/eth-canopy-height/properties.json b/tests/test_data/eth-canopy-height/properties.json index e69de29..ae14f17 100644 --- a/tests/test_data/eth-canopy-height/properties.json +++ b/tests/test_data/eth-canopy-height/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "height_of_vegetation" + ] +} \ No newline at end of file diff --git a/tests/test_data/fapar-lai/download/fapar-lai/properties.json b/tests/test_data/fapar-lai/download/fapar-lai/properties.json deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_data/fapar-lai/download/fapar-lai/satellite-lai-fapar_2019-1.zip b/tests/test_data/fapar-lai/download/fapar-lai/satellite-lai-fapar_2019-1.zip deleted file mode 100644 index 98f3b28..0000000 Binary files a/tests/test_data/fapar-lai/download/fapar-lai/satellite-lai-fapar_2019-1.zip and /dev/null differ diff --git a/tests/test_data/fapar-lai/generate_fake_data.py b/tests/test_data/fapar-lai/generate_fake_data.py deleted file mode 100644 index a42c475..0000000 --- a/tests/test_data/fapar-lai/generate_fake_data.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Generate fake data for the fapar-lai tests.""" - -import zipfile -from pathlib import Path -import numpy as np -import xarray as xr - - -if __name__ == "__main__": - download_path = Path("download") - ingest_path = Path("ingest") - download_path.mkdir(exist_ok=True) - ingest_path.mkdir(exist_ok=True) - - # Generate raw NC files: - latitude = np.linspace(80, -59.99107142876241, num=15680, endpoint=True) - longitude = np.linspace(-180, 180, num=40320, endpoint=True) - dummy_data = np.ones((1, len(latitude), len(longitude))) - encoding = {"LAI": {"zlib": True, "complevel": 8}} - days = [10, 20, 31] - - fnames: list[Path] = [] - for day in days: - time = np.array([np.datetime64(f"2019-01-{day}")]) - - da = xr.DataArray( - data=dummy_data, - dims=[ - "time", - "lat", - "lon", - ], - coords={"time": time, "lat": latitude, "lon": longitude}, - ) - - da.name = "LAI" - da.attrs = { - "long_name": "Effective Leaf Area Index 1km", - "grid_mapping": "crs", - "standard_name": "leaf_area_index", - "units": "fraction", - "valid_range": np.array([0, 65534], dtype=np.uint16), - } - ds = da.to_dataset() - - fname = f"c3s_LAI_201901{day}000000_GLOBE_PROBAV_V3.0.1.nc" - - ds.to_netcdf(fname, encoding=encoding) - fnames.append(Path(fname)) - - # Zip em up - zip_fname = "satellite-lai-fapar_2019-1.zip" - with zipfile.ZipFile(download_path / zip_fname, mode="w") as zipf: - for fname in fnames: - zipf.write(fname) - - # Clean up raw nc files - for fname in fnames: - fname.unlink() - - # Generate ingested nc files: - encoding = {"leaf_area_index": {"zlib": True, "complevel": 8}} - for day in days: - time = np.array([np.datetime64(f"2019-01-{day}")]) - - da = xr.DataArray( - data=dummy_data, - dims=[ - "time", - "latitude", - "longitude", - ], - coords={"time": time, "latitude": latitude, "longitude": longitude}, - ) - - da.name = "leaf_area_index" - da.attrs = { - "long_name": "Effective Leaf Area Index 1km", - "grid_mapping": "crs", - "standard_name": "leaf_area_index", - "units": "fraction", - "valid_range": np.array([0, 65534], dtype=np.uint16), - } - ds = da.to_dataset() - - fname = f"c3s_LAI_201901{day}000000_GLOBE_PROBAV_V3.0.1.nc" - ds.to_netcdf(ingest_path / fname, encoding=encoding) diff --git a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190110000000_GLOBE_PROBAV_V3.0.1.nc b/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190110000000_GLOBE_PROBAV_V3.0.1.nc deleted file mode 100644 index 3a257d6..0000000 Binary files a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190110000000_GLOBE_PROBAV_V3.0.1.nc and /dev/null differ diff --git a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190120000000_GLOBE_PROBAV_V3.0.1.nc b/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190120000000_GLOBE_PROBAV_V3.0.1.nc deleted file mode 100644 index fb42750..0000000 Binary files a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190120000000_GLOBE_PROBAV_V3.0.1.nc and /dev/null differ diff --git a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190131000000_GLOBE_PROBAV_V3.0.1.nc b/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190131000000_GLOBE_PROBAV_V3.0.1.nc deleted file mode 100644 index e755148..0000000 Binary files a/tests/test_data/fapar-lai/ingest/fapar-lai/c3s_LAI_20190131000000_GLOBE_PROBAV_V3.0.1.nc and /dev/null differ diff --git a/tests/test_data/fapar-lai/ingest/fapar-lai/properties.json b/tests/test_data/fapar-lai/ingest/fapar-lai/properties.json deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_data/fapar-lai/properties.json b/tests/test_data/fapar-lai/properties.json new file mode 100644 index 0000000..4322911 --- /dev/null +++ b/tests/test_data/fapar-lai/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "leaf_area_index" + ] +} \ No newline at end of file diff --git a/tests/test_data/fapar-lai/satellite-lai-fapar_2020-1.zip b/tests/test_data/fapar-lai/satellite-lai-fapar_2020-1.zip new file mode 100644 index 0000000..98dc97f Binary files /dev/null and b/tests/test_data/fapar-lai/satellite-lai-fapar_2020-1.zip differ diff --git a/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip deleted file mode 100644 index 2438560..0000000 Binary files a/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_1996.zip and /dev/null differ diff --git a/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_2020.zip b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_2020.zip new file mode 100644 index 0000000..64d29ea Binary files /dev/null and b/tests/test_data/land-cover/land-cover_LCCS_MAP_300m_2020.zip differ diff --git a/tests/test_data/land-cover/properties.json b/tests/test_data/land-cover/properties.json index e69de29..c9e78a1 100644 --- a/tests/test_data/land-cover/properties.json +++ b/tests/test_data/land-cover/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "land_cover" + ] +} \ No newline at end of file diff --git a/tests/test_data/prism-dem-90/Copernicus_DSM_30_N50_00_E000_00.tar b/tests/test_data/prism-dem-90/Copernicus_DSM_30_N50_00_E000_00.tar new file mode 100644 index 0000000..2045185 Binary files /dev/null and b/tests/test_data/prism-dem-90/Copernicus_DSM_30_N50_00_E000_00.tar differ diff --git a/tests/test_data/prism-dem-90/Copernicus_DSM_30_N53_00_E004_00.tar b/tests/test_data/prism-dem-90/Copernicus_DSM_30_N53_00_E004_00.tar deleted file mode 100644 index 5ab1d74..0000000 Binary files a/tests/test_data/prism-dem-90/Copernicus_DSM_30_N53_00_E004_00.tar and /dev/null differ diff --git a/tests/test_data/prism-dem-90/properties.json b/tests/test_data/prism-dem-90/properties.json index e69de29..78e1626 100644 --- a/tests/test_data/prism-dem-90/properties.json +++ b/tests/test_data/prism-dem-90/properties.json @@ -0,0 +1,11 @@ +{ + "start_time": "2020-01-01", + "end_time": "2020-02-15", + "north": 60, + "east": 10, + "south": 50, + "west": 0, + "variable_names": [ + "elevation" + ] +} \ No newline at end of file diff --git a/tests/test_datasets/__init__.py b/tests/test_datasets/__init__.py index 596750e..8928a10 100644 --- a/tests/test_datasets/__init__.py +++ b/tests/test_datasets/__init__.py @@ -1,22 +1 @@ """This module contains all tests for datasets included in zampy.""" - -from pathlib import Path - - -test_folder = Path(__file__).resolve().parents[1] -data_folder = test_folder / "test_data" - - -ALL_DAYS = [ - "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", - "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", - "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", - "31", -] # fmt: skip - -ALL_HOURS = [ - "00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", - "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", - "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", - "21:00", "22:00", "23:00", -] # fmt: skip diff --git a/tests/test_datasets/test_cams.py b/tests/test_datasets/test_cams.py index 5afaba7..bbd67a4 100644 --- a/tests/test_datasets/test_cams.py +++ b/tests/test_datasets/test_cams.py @@ -6,10 +6,10 @@ import numpy as np import pytest import xarray as xr +from tests import data_folder from zampy.datasets.catalog import CAMS from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import data_folder @pytest.fixture(scope="function") @@ -37,7 +37,7 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): Here we mock the downloading and save property file to a fake path. """ times = TimeBounds(np.datetime64("2003-01-02"), np.datetime64("2003-01-04")) - bbox = SpatialBounds(54, 56, 1, 3) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["co2_concentration"] cds_var_names = ["carbon_dioxide"] download_dir = Path(dummy_dir, "download") @@ -84,31 +84,32 @@ def ingest_dummy_data(self, temp_dir): """Ingest dummy tif data to nc for other tests.""" cams_dataset = CAMS() cams_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) - ds = xr.load_dataset( + + return cams_dataset + + def test_ingest(self, dummy_dir): + """Test ingest function.""" + _ = self.ingest_dummy_data(dummy_dir) + ds = xr.open_dataset( Path( - temp_dir, + dummy_dir, "cams", - "cams_co2_concentration_2003_01_02-2003_01_04.nc", + "cams_co2_concentration_2020_01_01-2020_02_15.nc", ) ) - return ds, cams_dataset - - def test_ingest(self, dummy_dir): - """Test ingest function.""" - ds, _ = self.ingest_dummy_data(dummy_dir) assert isinstance(ds, xr.Dataset) - def test_load(self): + def test_load(self, dummy_dir): """Test load function.""" - times = TimeBounds(np.datetime64("2003-01-02"), np.datetime64("2003-01-04")) - bbox = SpatialBounds(39, -107, 37, -109) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(59.75, 2.25, 57.5, 0) variable = ["co2_concentration"] - cams_dataset = CAMS() + cams_dataset = self.ingest_dummy_data(dummy_dir) ds = cams_dataset.load( - ingest_dir=Path(data_folder), + ingest_dir=Path(dummy_dir), time_bounds=times, spatial_bounds=bbox, variable_names=variable, @@ -116,14 +117,14 @@ def test_load(self): ) # we assert the regridded coordinates - expected_lat = [37.0, 38.0, 39.0] - expected_lon = [-109.0, -108.0, -107.0] + expected_lat = [57.5, 58.5, 59.5] + expected_lon = [0.0, 1.0, 2.0] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) def test_convert(self, dummy_dir): """Test convert function.""" - _, cams_dataset = self.ingest_dummy_data(dummy_dir) + cams_dataset = self.ingest_dummy_data(dummy_dir) cams_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. diff --git a/tests/test_datasets/test_era5.py b/tests/test_datasets/test_era5.py index 7e9945e..374ef44 100644 --- a/tests/test_datasets/test_era5.py +++ b/tests/test_datasets/test_era5.py @@ -6,12 +6,12 @@ import numpy as np import pytest import xarray as xr +from tests import ALL_DAYS +from tests import ALL_HOURS +from tests import data_folder from zampy.datasets.catalog import ERA5 from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import ALL_DAYS -from . import ALL_HOURS -from . import data_folder @pytest.fixture(scope="function") @@ -38,8 +38,8 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): """Test download functionality. Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31")) - bbox = SpatialBounds(54, 56, 1, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-02-15")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["eastward_component_of_wind"] cds_var_names = ["10m_u_component_of_wind"] download_dir = Path(dummy_dir, "download") @@ -62,7 +62,7 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): { "product_type": "reanalysis", "variable": cds_var_names, - "year": "2010", + "year": "2020", "month": "1", # fmt: off "day": ALL_DAYS, @@ -90,46 +90,49 @@ def ingest_dummy_data(self, temp_dir): """Ingest dummy tif data to nc for other tests.""" era5_dataset = ERA5() era5_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) - ds = xr.load_dataset( - Path( - temp_dir, - "era5", - "era5_northward_component_of_wind_1996-1.nc", - ) - ) - return ds, era5_dataset + return era5_dataset def test_ingest(self, dummy_dir): """Test ingest function.""" - ds, _ = self.ingest_dummy_data(dummy_dir) + _ = self.ingest_dummy_data(dummy_dir) + ds = xr.open_dataset( + Path( + dummy_dir, + "era5", + "era5_northward_component_of_wind_2020-1.nc", + ) + ) assert isinstance(ds, xr.Dataset) - def test_load(self): + def test_load(self, dummy_dir): """Test load function.""" - times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02")) - bbox = SpatialBounds(39, -107, 37, -109) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["northward_component_of_wind"] - era5_dataset = ERA5() + era5_dataset = self.ingest_dummy_data(dummy_dir) ds = era5_dataset.load( - ingest_dir=Path(data_folder), + ingest_dir=Path(dummy_dir), time_bounds=times, spatial_bounds=bbox, variable_names=variable, - resolution=1.0, + resolution=0.1, ) # we assert the regridded coordinates - expected_lat = [37.0, 38.0, 39.0] - expected_lon = [-109.0, -108.0, -107.0] + expected_lat = [59.7, 59.8, 59.9] + expected_lon = [0.0, 0.1, 0.2] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) + # check if valid_time not in the dataset + assert "valid_time" not in ds.dims + def test_convert(self, dummy_dir): """Test convert function.""" - _, era5_dataset = self.ingest_dummy_data(dummy_dir) + era5_dataset = self.ingest_dummy_data(dummy_dir) era5_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. diff --git a/tests/test_datasets/test_era5_land.py b/tests/test_datasets/test_era5_land.py index 74376c0..43179ce 100644 --- a/tests/test_datasets/test_era5_land.py +++ b/tests/test_datasets/test_era5_land.py @@ -6,12 +6,12 @@ import numpy as np import pytest import xarray as xr +from tests import ALL_DAYS +from tests import ALL_HOURS +from tests import data_folder from zampy.datasets.catalog import ERA5Land from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import ALL_DAYS -from . import ALL_HOURS -from . import data_folder @pytest.fixture(scope="function") @@ -38,8 +38,8 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): """Test download functionality. Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("2010-01-01"), np.datetime64("2010-01-31")) - bbox = SpatialBounds(54, 56, 1, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-02-15")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["dewpoint_temperature"] cds_var_names = ["2m_dewpoint_temperature"] download_dir = Path(dummy_dir, "download") @@ -62,7 +62,7 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): { "product_type": "reanalysis", "variable": cds_var_names, - "year": "2010", + "year": "2020", "month": "1", "day": ALL_DAYS, "time": ALL_HOURS, @@ -88,46 +88,49 @@ def ingest_dummy_data(self, temp_dir): """Ingest dummy tif data to nc for other tests.""" era5_land_dataset = ERA5Land() era5_land_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) - ds = xr.load_dataset( - Path( - temp_dir, - "era5-land", - "era5-land_dewpoint_temperature_1996-1.nc", - ) - ) - return ds, era5_land_dataset + return era5_land_dataset def test_ingest(self, dummy_dir): """Test ingest function.""" - ds, _ = self.ingest_dummy_data(dummy_dir) + _ = self.ingest_dummy_data(dummy_dir) + ds = xr.open_dataset( + Path( + dummy_dir, + "era5-land", + "era5-land_dewpoint_temperature_2020-1.nc", + ) + ) assert isinstance(ds, xr.Dataset) - def test_load(self): + def test_load(self, dummy_dir): """Test load function.""" - times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-01-02")) - bbox = SpatialBounds(39, -107, 37, -109) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["dewpoint_temperature"] - era5_land_dataset = ERA5Land() + era5_land_dataset = self.ingest_dummy_data(dummy_dir) ds = era5_land_dataset.load( - ingest_dir=Path(data_folder), + ingest_dir=Path(dummy_dir), time_bounds=times, spatial_bounds=bbox, variable_names=variable, - resolution=1.0, + resolution=0.1, ) # we assert the regridded coordinates - expected_lat = [37.0, 38.0, 39.0] - expected_lon = [-109.0, -108.0, -107.0] + expected_lat = [59.7, 59.8, 59.9] + expected_lon = [0.0, 0.1, 0.2] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) + # check if valid_time not in the dataset + assert "valid_time" not in ds.dims + def test_convert(self, dummy_dir): """Test convert function.""" - _, era5_land_dataset = self.ingest_dummy_data(dummy_dir) + era5_land_dataset = self.ingest_dummy_data(dummy_dir) era5_land_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. diff --git a/tests/test_datasets/test_eth_canopy_height.py b/tests/test_datasets/test_eth_canopy_height.py index b8ac145..9852906 100644 --- a/tests/test_datasets/test_eth_canopy_height.py +++ b/tests/test_datasets/test_eth_canopy_height.py @@ -6,10 +6,10 @@ import numpy as np import pytest import xarray as xr +from tests import data_folder from zampy.datasets import eth_canopy_height from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import data_folder @pytest.fixture(scope="function") @@ -27,8 +27,8 @@ def test_download(self, mock_urlretrieve, dummy_dir): Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31")) - bbox = SpatialBounds(54, 6, 51, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-02-15")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["height_of_vegetation"] download_dir = Path(dummy_dir, "download") @@ -57,28 +57,28 @@ def ingest_dummy_data(self, temp_dir): canopy_height_dataset.ingest( download_dir=data_folder, ingest_dir=Path(temp_dir) ) - ds = xr.load_dataset( + + return canopy_height_dataset + + def test_ingest(self, dummy_dir): + """Test ingest function.""" + _ = self.ingest_dummy_data(dummy_dir) + ds = xr.open_dataset( Path( - temp_dir, + dummy_dir, "eth-canopy-height", "ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc", ) ) - return ds, canopy_height_dataset - - def test_ingest(self, dummy_dir): - """Test ingest function.""" - ds, _ = self.ingest_dummy_data(dummy_dir) - assert isinstance(ds, xr.Dataset) def test_load(self, dummy_dir): """Test load function.""" - _, canopy_height_dataset = self.ingest_dummy_data(dummy_dir) + canopy_height_dataset = self.ingest_dummy_data(dummy_dir) - times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31")) - bbox = SpatialBounds(54, 6, 51, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["height_of_vegetation"] ds = canopy_height_dataset.load( @@ -86,19 +86,19 @@ def test_load(self, dummy_dir): time_bounds=times, spatial_bounds=bbox, variable_names=variable, - resolution=1.0, + resolution=0.1, ) # we assert the regridded coordinates - expected_lat = [51.0, 52.0, 53.0, 54.0] - expected_lon = [3.0, 4.0, 5.0, 6.0] + expected_lat = [59.7, 59.8, 59.9] + expected_lon = [0.0, 0.1, 0.2] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) def test_convert(self, dummy_dir): """Test convert function.""" - _, canopy_height_dataset = self.ingest_dummy_data(dummy_dir) + canopy_height_dataset = self.ingest_dummy_data(dummy_dir) canopy_height_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. @@ -168,7 +168,7 @@ def test_convert_tiff_to_netcdf(dummy_dir): file=dummy_data, ) - ds = xr.load_dataset( + ds = xr.open_dataset( Path(dummy_dir, "ETH_GlobalCanopyHeight_10m_2020_N51E003_Map.nc") ) assert isinstance(ds, xr.Dataset) diff --git a/tests/test_datasets/test_fapar_lai.py b/tests/test_datasets/test_fapar_lai.py index 52e6bf7..3d0ad16 100644 --- a/tests/test_datasets/test_fapar_lai.py +++ b/tests/test_datasets/test_fapar_lai.py @@ -3,14 +3,13 @@ import json from pathlib import Path from unittest.mock import patch -import dask.distributed import numpy as np import pytest import xarray as xr +from tests import data_folder from zampy.datasets.catalog import FaparLAI from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import data_folder @pytest.fixture(scope="function") @@ -37,8 +36,8 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): """Test download functionality. Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("2019-01-01"), np.datetime64("2019-01-31")) - bbox = SpatialBounds(54, 56, 1, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-31")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["leaf_area_index"] download_dir = Path(dummy_dir, "download") @@ -61,12 +60,12 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): "format": "zip", "variable": "lai", "horizontal_resolution": "1km", - "product_version": "V3", + "product_version": "v3", "satellite": "proba", "sensor": "vgt", "month": "01", "nominal_day": ["10", "20", "31"], - "year": "2019", + "year": "2020", "area": [ bbox.north, bbox.west, @@ -84,42 +83,35 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): # check property assert json_dict["variable_names"] == variable - @pytest.mark.slow def test_ingest(self, dummy_dir): """Test ingest function.""" - dask.distributed.Client() - - ingest_dir = Path(dummy_dir) / "ingest" - ingest_dir.mkdir() lai_dataset = FaparLAI() - lai_dataset.ingest( - download_dir=data_folder / "fapar-lai" / "download", ingest_dir=ingest_dir - ) + lai_dataset.ingest(download_dir=data_folder, ingest_dir=dummy_dir) - with xr.open_mfdataset((ingest_dir / "fapar-lai").glob("*.nc")) as ds: - assert isinstance(ds, xr.Dataset) + ds = xr.open_mfdataset((dummy_dir / "fapar-lai").glob("*.nc")) + assert isinstance(ds, xr.Dataset) - @pytest.mark.slow # depends on ingested data being available - def test_load(self): + def test_load(self, dummy_dir): """Test load function.""" - times = TimeBounds(np.datetime64("2019-01-01"), np.datetime64("2019-01-31")) - bbox = SpatialBounds(39, -107, 37, -109) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["leaf_area_index"] lai_dataset = FaparLAI() + lai_dataset.ingest(download_dir=data_folder, ingest_dir=dummy_dir) ds = lai_dataset.load( - ingest_dir=data_folder / "fapar-lai" / "ingest", + ingest_dir=dummy_dir, time_bounds=times, spatial_bounds=bbox, variable_names=variable, - resolution=1.0, + resolution=0.1, ) # we assert the regridded coordinates - expected_lat = [37.0, 38.0, 39.0] - expected_lon = [-109.0, -108.0, -107.0] + expected_lat = [59.7, 59.8, 59.9] + expected_lon = [0.0, 0.1, 0.2] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) diff --git a/tests/test_datasets/test_land_cover.py b/tests/test_datasets/test_land_cover.py index 5aae0a5..8876251 100644 --- a/tests/test_datasets/test_land_cover.py +++ b/tests/test_datasets/test_land_cover.py @@ -7,10 +7,11 @@ import pytest import xarray as xr import zampy.datasets.land_cover +from tests import data_folder from zampy.datasets.catalog import LandCover from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import data_folder +from zampy.datasets.land_cover import get_unique_values @pytest.fixture(scope="function") @@ -37,8 +38,8 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): """Test download functionality. Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) - bbox = SpatialBounds(54, 56, 1, 3) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-31")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["land_cover"] download_dir = Path(dummy_dir, "download") @@ -60,8 +61,9 @@ def test_download(self, mock_retrieve, valid_path_config, dummy_dir): { "variable": "all", "format": "zip", - "year": "1996", - "version": "v2.0.7cds", + "year": "2020", + "version": "v2_1_1", + "area": [60, 0, 50, 10], }, ) @@ -77,11 +79,11 @@ def ingest_dummy_data(self, temp_dir): """Ingest dummy zip data to nc for other tests.""" land_cover_dataset = LandCover() land_cover_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) - ds = xr.load_dataset( + ds = xr.open_dataset( Path( temp_dir, "land-cover", - "land-cover_LCCS_MAP_300m_1996.nc", + "land-cover_LCCS_MAP_300m_2020.nc", ) ) @@ -96,27 +98,65 @@ def test_ingest(self, dummy_dir): @pytest.mark.slow def test_load(self, dummy_dir): """Test load function.""" - times = TimeBounds(np.datetime64("1996-01-01"), np.datetime64("1996-12-31")) - bbox = SpatialBounds(39, -107, 37, -109) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["land_cover"] - _, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + ingest_ds, land_cover_dataset = self.ingest_dummy_data(dummy_dir) ds = land_cover_dataset.load( ingest_dir=Path(dummy_dir), time_bounds=times, spatial_bounds=bbox, variable_names=variable, - resolution=1.0, + resolution=0.1, ) # we assert the regridded coordinates - expected_lat = [37.0, 38.0, 39.0] - expected_lon = [-109.0, -108.0, -107.0] + expected_lat = [59.7, 59.8, 59.9] + expected_lon = [0.0, 0.1, 0.2] np.testing.assert_allclose(ds.latitude.values, expected_lat) np.testing.assert_allclose(ds.longitude.values, expected_lon) + # check if unique values of ds are a subset of ingest_ds + assert np.all( + np.isin( + np.unique(ds.land_cover.values), + ingest_ds["land_cover"].attrs["flag_values"], + ) + ) + + def test_land_cover_without_flag_values(self, dummy_dir): + """Test load function.""" + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) + variable = ["land_cover"] + + ingest_ds, land_cover_dataset = self.ingest_dummy_data(dummy_dir) + + # store unique values + unique_values = ingest_ds["land_cover"].attrs["flag_values"] + + # remove flag_values + ingest_ds["land_cover"].attrs.pop("flag_values") + + ds = land_cover_dataset.load( + ingest_dir=Path(dummy_dir), + time_bounds=times, + spatial_bounds=bbox, + variable_names=variable, + resolution=0.1, + ) + + # check if unique values of ds are a subset of ingest_ds + assert np.all( + np.isin( + np.unique(ds.land_cover.values), + unique_values, + ) + ) + @pytest.mark.slow def test_convert(self, dummy_dir): """Test convert function.""" @@ -128,7 +168,7 @@ def test_convert(self, dummy_dir): @pytest.mark.slow def test_unzip_raw_to_netcdf(): ds = zampy.datasets.land_cover.extract_netcdf_to_zampy( - data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip" + data_folder / "land-cover/land-cover_LCCS_MAP_300m_2020.zip" ) assert isinstance(ds, xr.Dataset) @@ -137,7 +177,29 @@ def test_unzip_raw_to_netcdf(): def test_extract_netcdf_to_zampy(dummy_dir): zampy.datasets.land_cover.unzip_raw_to_netcdf( Path(dummy_dir), - data_folder / "land-cover/land-cover_LCCS_MAP_300m_1996.zip", + data_folder / "land-cover/land-cover_LCCS_MAP_300m_2020.zip", ) - dataset_path = Path(dummy_dir) / "land-cover_LCCS_MAP_300m_1996.nc" + dataset_path = Path(dummy_dir) / "land-cover_LCCS_MAP_300m_2020.nc" assert dataset_path.exists() + + +def test_get_unique_values(): + """Test get_unique_values function.""" + da = xr.DataArray( + data=np.array([1, 2, 3, 4, 5]), + attrs={"flag_values": np.array([1, 2, 3])}, + ) + assert np.all(get_unique_values(da) == np.array([1, 2, 3])) + + da = xr.DataArray( + data=np.array([1, 2, 3, 4, 5]), + attrs={}, + ) + assert np.all(get_unique_values(da) == np.array([1, 2, 3, 4, 5])) + + da = xr.DataArray( + data=np.array([1, 2, 3, 4, 5]), + attrs={}, + ) + da = da.chunk() + assert np.all(get_unique_values(da) == np.array([1, 2, 3, 4, 5])) diff --git a/tests/test_datasets/test_prism_dem.py b/tests/test_datasets/test_prism_dem.py index 83b549f..0b07b74 100644 --- a/tests/test_datasets/test_prism_dem.py +++ b/tests/test_datasets/test_prism_dem.py @@ -6,10 +6,10 @@ import numpy as np import pytest import xarray as xr +from tests import data_folder from zampy.datasets import prism_dem from zampy.datasets.dataset_protocol import SpatialBounds from zampy.datasets.dataset_protocol import TimeBounds -from . import data_folder @pytest.fixture(scope="function") @@ -27,8 +27,8 @@ def test_download(self, mock_urlretrieve, dummy_dir): Here we mock the downloading and save property file to a fake path. """ - times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31")) - bbox = SpatialBounds(54, 5, 53, 4) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-31")) + bbox = SpatialBounds(60, 10, 50, 0) variable = ["elevation"] download_dir = Path(dummy_dir, "download") @@ -55,28 +55,28 @@ def ingest_dummy_data(self, temp_dir): """Ingest dummy tif data to nc for other tests.""" prism_dem_dataset = prism_dem.PrismDEM90() prism_dem_dataset.ingest(download_dir=data_folder, ingest_dir=Path(temp_dir)) - ds = xr.load_dataset( - Path( - temp_dir, - "prism-dem-90", - "Copernicus_DSM_30_N53_00_E004_00.nc", - ) - ) - return ds, prism_dem_dataset + return prism_dem_dataset def test_ingest(self, dummy_dir): """Test ingest function.""" - ds, _ = self.ingest_dummy_data(dummy_dir) + _ = self.ingest_dummy_data(dummy_dir) + ds = xr.open_dataset( + Path( + dummy_dir, + "prism-dem-90", + "Copernicus_DSM_30_N50_00_E000_00.nc", + ) + ) assert isinstance(ds, xr.Dataset) def test_load(self, dummy_dir): """Test load function.""" - _, prism_dem_dataset = self.ingest_dummy_data(dummy_dir) + prism_dem_dataset = self.ingest_dummy_data(dummy_dir) - times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-12-31")) - bbox = SpatialBounds(54, 5, 53, 4) + times = TimeBounds(np.datetime64("2020-01-01"), np.datetime64("2020-01-04")) + bbox = SpatialBounds(60.0, 0.3, 59.7, 0.0) variable = ["elevation"] ds = prism_dem_dataset.load( @@ -88,14 +88,14 @@ def test_load(self, dummy_dir): ) # we assert the regridded coordinates - expected_lat = [53.00, 53.25, 53.50, 53.75, 54.0] - expected_lon = [4.00, 4.25, 4.50, 4.75, 5.0] + expected_lat = [59.7, 59.95] + expected_lon = [0.0, 0.25] np.testing.assert_allclose(ds["latitude"].values, expected_lat) np.testing.assert_allclose(ds["longitude"].values, expected_lon) def test_convert(self, dummy_dir): """Test convert function.""" - _, prism_dem_dataset = self.ingest_dummy_data(dummy_dir) + prism_dem_dataset = self.ingest_dummy_data(dummy_dir) prism_dem_dataset.convert(ingest_dir=Path(dummy_dir), convention="ALMA") # TODO: finish this test when the function is complete. diff --git a/tests/test_recipes/recipes/era5_recipe.yml b/tests/test_recipes/recipes/era5_recipe.yml index d2c98af..25a6e5e 100644 --- a/tests/test_recipes/recipes/era5_recipe.yml +++ b/tests/test_recipes/recipes/era5_recipe.yml @@ -12,5 +12,5 @@ download: convert: convention: ALMA - frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. + frequency: 1h # outputs at 1 hour frequency. Pandas-like freq-keyword. resolution: 0.5 # output resolution in degrees. diff --git a/tests/test_recipes/test_simple_recipe.py b/tests/test_recipes/test_simple_recipe.py index e8dfd18..413525b 100644 --- a/tests/test_recipes/test_simple_recipe.py +++ b/tests/test_recipes/test_simple_recipe.py @@ -50,6 +50,151 @@ def test_recipe(tmp_path: Path, mocker): ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) assert all(var in ds.data_vars for var in ["Psurf", "Wind_N"]) + # Check if time frequency is correct + assert ds.time.diff("time").min() == np.timedelta64(1, "h") + + +def test_recipe_with_lower_frequency(tmp_path: Path, mocker): + with ( + patch.object(DATASETS["era5"], "download"), + ): + dask.distributed.Client() + + mocker.patch( + "zampy.recipe.config_loader", + return_value={"working_directory": str(tmp_path.absolute())}, + ) + rm = RecipeManager(RECIPE_FILE.absolute()) + rm.frequency = "6h" # change the frequency of the recipe + + spatial_bounds = SpatialBounds(51, 4, 50, 3) + time_bounds = TimeBounds( + np.datetime64("2020-01-01T00:00"), np.datetime64("2020-01-01T23:59") + ) + variables = ["northward_component_of_wind", "surface_pressure"] + + generate_test_data.generate_era5_files( + directory=tmp_path / "download", + variables=variables, + spatial_bounds=spatial_bounds, + time_bounds=time_bounds, + ) + write_properties_file( + tmp_path / "download" / "era5", spatial_bounds, time_bounds, variables + ) + + rm.run() + + ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) + # check the lenght of the time dimension, mean values are used + assert len(ds.time) == 4 + + +def test_recipe_with_higher_frequency(tmp_path: Path, mocker): + with ( + patch.object(DATASETS["era5"], "download"), + ): + dask.distributed.Client() + + mocker.patch( + "zampy.recipe.config_loader", + return_value={"working_directory": str(tmp_path.absolute())}, + ) + rm = RecipeManager(RECIPE_FILE.absolute()) + rm.frequency = "30min" # change the frequency of the recipe + + spatial_bounds = SpatialBounds(51, 4, 50, 3) + time_bounds = TimeBounds( + np.datetime64("2020-01-01T00:00"), np.datetime64("2020-01-01T23:59") + ) + variables = ["northward_component_of_wind", "surface_pressure"] + + generate_test_data.generate_era5_files( + directory=tmp_path / "download", + variables=variables, + spatial_bounds=spatial_bounds, + time_bounds=time_bounds, + ) + write_properties_file( + tmp_path / "download" / "era5", spatial_bounds, time_bounds, variables + ) + + rm.run() + + ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) + # check the lenght of the time dimension, data is interpolated + assert len(ds.time) == 47 + + +def test_recipe_with_two_time_values(tmp_path: Path, mocker): + with ( + patch.object(DATASETS["era5"], "download"), + ): + dask.distributed.Client() + + mocker.patch( + "zampy.recipe.config_loader", + return_value={"working_directory": str(tmp_path.absolute())}, + ) + rm = RecipeManager(RECIPE_FILE.absolute()) + + spatial_bounds = SpatialBounds(51, 4, 50, 3) + time_bounds = TimeBounds( + np.datetime64("2020-01-01T00:00"), np.datetime64("2020-01-01T02:00") + ) + variables = ["northward_component_of_wind", "surface_pressure"] + + generate_test_data.generate_era5_files( + directory=tmp_path / "download", + variables=variables, + spatial_bounds=spatial_bounds, + time_bounds=time_bounds, + ) + write_properties_file( + tmp_path / "download" / "era5", spatial_bounds, time_bounds, variables + ) + + rm.run() + + ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) + # check the lenght of the time dimension + assert len(ds.time) == 2 + + +def test_recipe_with_one_time_values(tmp_path: Path, mocker): + with ( + patch.object(DATASETS["era5"], "download"), + ): + dask.distributed.Client() + + mocker.patch( + "zampy.recipe.config_loader", + return_value={"working_directory": str(tmp_path.absolute())}, + ) + rm = RecipeManager(RECIPE_FILE.absolute()) + + spatial_bounds = SpatialBounds(51, 4, 50, 3) + time_bounds = TimeBounds( + np.datetime64("2020-01-01T00:00"), np.datetime64("2020-01-01T00:00") + ) + variables = ["northward_component_of_wind", "surface_pressure"] + + generate_test_data.generate_era5_files( + directory=tmp_path / "download", + variables=variables, + spatial_bounds=spatial_bounds, + time_bounds=time_bounds, + ) + write_properties_file( + tmp_path / "download" / "era5", spatial_bounds, time_bounds, variables + ) + + rm.run() + + ds = xr.open_mfdataset(str(tmp_path / "output" / "era5_recipe" / "*.nc")) + # check the lenght of the time dimension, should not do interpolation or + # extrapolation in time + assert len(ds.time) == 1 def test_invalid_time_format():