refactor dask.config.set

EcoExtreML · Feb 2, 2024 · 6a08b92 · 6a08b92
1 parent 76264fe
commit 6a08b92
Show file tree

Hide file tree

Showing 8 changed files with 9 additions and 35 deletions.
diff --git a/PyStemmusScope/forcing_io.py b/PyStemmusScope/forcing_io.py
@@ -1,5 +1,6 @@
 """Module for forcing data input and output operations."""
 from pathlib import Path
+import dask
 import hdf5storage
 import numpy as np
 import xarray as xr
@@ -130,12 +131,14 @@ def read_forcing_data_global(  # noqa:PLR0913 (too many arguments)
     Returns:
         Dictionary containing the forcing data.
     """
-    return global_data.collect_datasets(
-        global_data_dir=global_data_dir,
-        latlon=(lat, lon),
-        time_range=(start_time, end_time),
-        timestep=timestep,
-    )
+    # see https://docs.dask.org/en/latest/array-slicing.html#efficiency
+    with dask.config.set(**{"array.slicing.split_large_chunks": True}):   # type: ignore
+        return global_data.collect_datasets(
+            global_data_dir=global_data_dir,
+            latlon=(lat, lon),
+            time_range=(start_time, end_time),
+            timestep=timestep,
+        )
 
 
 def write_dat_files(data: dict, input_dir: Path):

diff --git a/PyStemmusScope/global_data/cams_co2.py b/PyStemmusScope/global_data/cams_co2.py
@@ -1,15 +1,11 @@
 """Module for loading and validating the CAMS CO2 dataset."""
 from pathlib import Path
 from typing import Union
-import dask
 import numpy as np
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 RESOLUTION_CAMS = 0.75  # Resolution of the dataset in degrees
 
 

diff --git a/PyStemmusScope/global_data/cci_landcover.py b/PyStemmusScope/global_data/cci_landcover.py
@@ -1,16 +1,12 @@
 """Module for loading and validating the ESA CCI land cover dataset."""
 from pathlib import Path
 from typing import Union
-import dask
 import numpy as np
 import pandas as pd
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 RESOLUTION_CCI = 1 / 360  # Resolution of the dataset in degrees
 FILEPATH_LANDCOVER_TABLE = Path(__file__).parent / "assets" / "lccs_to_igbp_table.csv"
 

diff --git a/PyStemmusScope/global_data/copernicus_lai.py b/PyStemmusScope/global_data/copernicus_lai.py
@@ -1,15 +1,11 @@
 """Module for loading and validating the Copernicus LAI dataset."""
 from pathlib import Path
 from typing import Union
-import dask
 import numpy as np
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 RESOLUTION_LAI = 1 / 112  # Resolution of the LAI dataset in degrees
 
 

diff --git a/PyStemmusScope/global_data/era5.py b/PyStemmusScope/global_data/era5.py
@@ -2,16 +2,12 @@
 from pathlib import Path
 from typing import Literal
 from typing import Union
-import dask
 import numpy as np
 import PyStemmusScope.variable_conversion as vc
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 ERA5_VARIABLES = ["u10", "v10", "mtpr", "sp", "ssrd", "strd"]
 ERA5LAND_VARIABLES = ["t2m", "d2m"]
 RESOLUTION_ERA5 = 0.25  # Resolution in degrees, to find nearest gridpoint.

diff --git a/PyStemmusScope/global_data/eth_canopy_height.py b/PyStemmusScope/global_data/eth_canopy_height.py
@@ -2,14 +2,10 @@
 import gzip
 from pathlib import Path
 from typing import Union
-import dask
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 MAX_DISTANCE = 0.01  # Maximum lat/lon distance to be considered nearby.
 
 

diff --git a/PyStemmusScope/global_data/prism_dem.py b/PyStemmusScope/global_data/prism_dem.py
@@ -2,14 +2,10 @@
 import gzip
 from pathlib import Path
 from typing import Union
-import dask
 import xarray as xr
 from PyStemmusScope.global_data import utils
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
 MAX_DISTANCE = 0.01  #  Maximum lat/lon distance to be considered nearby. Approx 1km.
 
 

diff --git a/PyStemmusScope/global_data/utils.py b/PyStemmusScope/global_data/utils.py
@@ -1,14 +1,9 @@
 """Utility funtions for the global data IO."""
 from typing import Union
-import dask
 import numpy as np
 import xarray as xr
 
 
-# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
-dask.config.set(**{"array.slicing.split_large_chunks": True})  # type: ignore
-
-
 class MissingDataError(Exception):
     """Error to be raised when requested data is missing."""