Skip to content

Commit

Permalink
fix dask PerformanceWarning: Slicing is producing a large chunk
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahAlidoost committed Feb 2, 2024
1 parent 5b9585c commit fa998c6
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 0 deletions.
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/cams_co2.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Module for loading and validating the CAMS CO2 dataset."""
from pathlib import Path
from typing import Union
import dask
import numpy as np
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

RESOLUTION_CAMS = 0.75 # Resolution of the dataset in degrees

Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/cci_landcover.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Module for loading and validating the ESA CCI land cover dataset."""
from pathlib import Path
from typing import Union
import dask
import numpy as np
import pandas as pd
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

RESOLUTION_CCI = 1 / 360 # Resolution of the dataset in degrees
FILEPATH_LANDCOVER_TABLE = Path(__file__).parent / "assets" / "lccs_to_igbp_table.csv"
Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/copernicus_lai.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Module for loading and validating the Copernicus LAI dataset."""
from pathlib import Path
from typing import Union
import dask
import numpy as np
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

RESOLUTION_LAI = 1 / 112 # Resolution of the LAI dataset in degrees

Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
from pathlib import Path
from typing import Literal
from typing import Union
import dask
import numpy as np
import PyStemmusScope.variable_conversion as vc
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

ERA5_VARIABLES = ["u10", "v10", "mtpr", "sp", "ssrd", "strd"]
ERA5LAND_VARIABLES = ["t2m", "d2m"]
Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/eth_canopy_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import gzip
from pathlib import Path
from typing import Union
import dask
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

MAX_DISTANCE = 0.01 # Maximum lat/lon distance to be considered nearby.

Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/prism_dem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import gzip
from pathlib import Path
from typing import Union
import dask
import xarray as xr
from PyStemmusScope.global_data import utils

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

MAX_DISTANCE = 0.01 # Maximum lat/lon distance to be considered nearby. Approx 1km.

Expand Down
3 changes: 3 additions & 0 deletions PyStemmusScope/global_data/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""Utility funtions for the global data IO."""
from typing import Union
import dask
import numpy as np
import xarray as xr

# see https://docs.dask.org/en/latest/array-slicing.html#efficiency
dask.config.set(**{'array.slicing.split_large_chunks': True})

class MissingDataError(Exception):
"""Error to be raised when requested data is missing."""
Expand Down

0 comments on commit fa998c6

Please sign in to comment.