Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setting chunks auto in open_mfdataset #95

Merged
merged 9 commits into from
Feb 2, 2024
17 changes: 10 additions & 7 deletions PyStemmusScope/forcing_io.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module for forcing data input and output operations."""
from pathlib import Path
import dask
import hdf5storage
import numpy as np
import xarray as xr
Expand Down Expand Up @@ -114,7 +115,7 @@ def read_forcing_data_global( # noqa:PLR0913 (too many arguments)
lon: float,
start_time: np.datetime64,
end_time: np.datetime64,
timestep: str = "1800S",
timestep: str = "1800s",
) -> dict:
"""Read forcing data for a certain location, based on global datasets.

Expand All @@ -130,12 +131,14 @@ def read_forcing_data_global( # noqa:PLR0913 (too many arguments)
Returns:
Dictionary containing the forcing data.
"""
return global_data.collect_datasets(
global_data_dir=global_data_dir,
latlon=(lat, lon),
time_range=(start_time, end_time),
timestep=timestep,
)
# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
with dask.config.set(**{"array.slicing.split_large_chunks": True}): # type: ignore
return global_data.collect_datasets(
global_data_dir=global_data_dir,
latlon=(lat, lon),
time_range=(start_time, end_time),
timestep=timestep,
)


def write_dat_files(data: dict, input_dir: Path):
Expand Down
6 changes: 3 additions & 3 deletions PyStemmusScope/global_data/cams_co2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def retrieve_co2_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
DataArray containing the CO2 at the specified site for the given time range.
Expand Down Expand Up @@ -55,12 +55,12 @@ def extract_cams_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
DataArray containing the CO2 concentration.
"""
ds = xr.open_mfdataset(files_cams)
ds = xr.open_mfdataset(files_cams, chunks="auto")

check_cams_dataset(cams_data=ds, latlon=latlon, time_range=time_range)

Expand Down
6 changes: 3 additions & 3 deletions PyStemmusScope/global_data/cci_landcover.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def retrieve_landcover_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
Dictionary containing IGBP and LCCS land cover classes.
Expand Down Expand Up @@ -57,12 +57,12 @@ def extract_landcover_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
Dictionary containing IGBP and LCCS land cover classes.
"""
cci_dataset = xr.open_mfdataset(files_cci)
cci_dataset = xr.open_mfdataset(files_cci, chunks="auto")

check_cci_dataset(cci_dataset, latlon, time_range) # Assert spatial/temporal bounds

Expand Down
6 changes: 3 additions & 3 deletions PyStemmusScope/global_data/copernicus_lai.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def retrieve_lai_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
DataArray containing the LAI of the specified site for the given time range.
Expand Down Expand Up @@ -55,12 +55,12 @@ def extract_lai_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
DataArray containing the LAI of the specified site for the given time range.
"""
ds = xr.open_mfdataset(files_lai)
ds = xr.open_mfdataset(files_lai, chunks="auto")

check_lai_dataset(ds, latlon, time_range)

Expand Down
9 changes: 5 additions & 4 deletions PyStemmusScope/global_data/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def retrieve_era5_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
Dictionary containing the variables extracted from ERA5.
Expand Down Expand Up @@ -68,7 +68,7 @@ def load_era5_data(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
Dictionary containing the variables extracted from ERA5.
Expand Down Expand Up @@ -117,14 +117,15 @@ def get_era5_dataset(
name: Either "ERA5" or "ERA5-land".
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
The ERA5 or ERA5-land dataset.
"""
tol = RESOLUTION_ERA5 if name == "ERA5" else RESOLUTION_ERA5LAND

ds = xr.open_mfdataset(files)
ds = xr.open_mfdataset(files, chunks="auto")

check_era5_dataset(ds, name, latlon, time_range)

try:
Expand Down
2 changes: 1 addition & 1 deletion PyStemmusScope/global_data/global_data_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def collect_datasets(
latlon: Latitude and longitude of the site.
time_range: Start and end time of the model run.
timestep: Desired timestep of the model, this is derived from the forcing data.
In a pandas-timedelta compatible format. For example: "1800S"
In a pandas-timedelta compatible format. For example: "1800s"

Returns:
Dictionary containing the variables extracted from the global datasets.
Expand Down
Loading