diff --git a/mapchete/config.py b/mapchete/config.py index 68e4ecb7..0fc6992c 100644 --- a/mapchete/config.py +++ b/mapchete/config.py @@ -57,7 +57,7 @@ class OutputConfigBase(BaseModel): metatiling: Union[int, None] = 1 pixelbuffer: Union[NonNegativeInt, None] = 0 - @field_validator("metatiling", mode='before') + @field_validator("metatiling", mode="before") def _metatiling(cls, value: int) -> int: # pragma: no cover _metatiling_opts = [2**x for x in range(10)] if value not in _metatiling_opts: @@ -70,7 +70,7 @@ class PyramidConfig(BaseModel): metatiling: Union[int, None] = 1 pixelbuffer: Union[NonNegativeInt, None] = 0 - @field_validator("metatiling", mode='before') + @field_validator("metatiling", mode="before") def _metatiling(cls, value: int) -> int: # pragma: no cover _metatiling_opts = [2**x for x in range(10)] if value not in _metatiling_opts: @@ -91,7 +91,6 @@ class ProcessConfig(BaseModel, arbitrary_types_allowed=True): bounds: Union[Bounds, Tuple[float, float, float, float], None] = None bounds_crs: Union[dict, str, None] = None process_parameters: Union[dict, None] = None - mapchete_file: Union[str, MPath, None] = None _RESERVED_PARAMETERS = tuple(ProcessConfig.model_fields.keys()) diff --git a/mapchete/io/__init__.py b/mapchete/io/__init__.py index 769d3e97..7f5cea93 100644 --- a/mapchete/io/__init__.py +++ b/mapchete/io/__init__.py @@ -9,7 +9,7 @@ tile_to_zoom_level, ) from mapchete.io.raster import rasterio_open -from mapchete.io.settings import GDAL_HTTP_OPTS +from mapchete.io.settings import GDALHTTPOptions from mapchete.io.vector import fiona_open from mapchete.path import ( MPath, @@ -25,7 +25,7 @@ __all__ = [ "copy", "fs_from_path", - "GDAL_HTTP_OPTS", + "GDALHTTPOptions", "get_best_zoom_level", "get_segmentize_value", "tile_to_zoom_level", diff --git a/mapchete/io/raster.py b/mapchete/io/raster.py index 30fde218..bbb715ff 100644 --- a/mapchete/io/raster.py +++ b/mapchete/io/raster.py @@ -26,7 +26,7 @@ from mapchete._timer import Timer from mapchete.errors import MapcheteIOError from mapchete.io import copy -from mapchete.io.settings import MAPCHETE_IO_RETRY_SETTINGS +from mapchete.io.settings import IORetrySettings from mapchete.path import MPath, fs_from_path from mapchete.tile import BufferedTile from mapchete.validate import validate_write_window_params @@ -516,7 +516,7 @@ def _get_warped_array( raise -@retry(logger=logger, exceptions=RasterioIOError, **MAPCHETE_IO_RETRY_SETTINGS) +@retry(logger=logger, exceptions=RasterioIOError, **dict(IORetrySettings())) def _rasterio_read( input_file=None, indexes=None, @@ -621,7 +621,7 @@ def read_raster_no_crs(input_file, indexes=None, gdal_opts=None): FileNotFoundError if file cannot be found. """ - @retry(logger=logger, exceptions=RasterioIOError, **MAPCHETE_IO_RETRY_SETTINGS) + @retry(logger=logger, exceptions=RasterioIOError, **dict(IORetrySettings())) def _read(): with warnings.catch_warnings(): warnings.simplefilter("ignore") diff --git a/mapchete/io/settings.py b/mapchete/io/settings.py index 4c175b2d..9825ca51 100644 --- a/mapchete/io/settings.py +++ b/mapchete/io/settings.py @@ -1,40 +1,38 @@ -import logging -import os +""" +Combine default values with environment variable values. +""" -logger = logging.getLogger(__name__) - - -def _merge_gdal_defaults_with_env(): - return {k: os.environ.get(k, v) for k, v in GDAL_HTTP_DEFAULTS.items()} +from pydantic_settings import BaseSettings, SettingsConfigDict # defaults sets according to the recommendations given at # https://developmentseed.org/titiler/advanced/performance_tuning/ -GDAL_HTTP_DEFAULTS = dict( +class GDALHTTPOptions(BaseSettings): # this will be set later on depending on the opened file - CPL_VSIL_CURL_ALLOWED_EXTENSIONS="", + CPL_VSIL_CURL_ALLOWED_EXTENSIONS: str = "" # 200MB - CPL_VSIL_CURL_CACHE_SIZE=200_000_000, + CPL_VSIL_CURL_CACHE_SIZE: int = 200_000_000 # alternative: ARRAY - GDAL_BAND_BLOCK_CACHE="HASHSET", + GDAL_BAND_BLOCK_CACHE: str = "HASHSET" # # 200MB # GDAL_CACHEMAX=200, --> activating this seems to let the tests stall at some point # don't make LIST request - GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR", - GDAL_HTTP_TIMEOUT=30, - GDAL_HTTP_MAX_RETRY=3, - GDAL_HTTP_MERGE_CONSECUTIVE_RANGES=True, - GDAL_HTTP_MULTIPLEX=True, - GDAL_HTTP_RETRY_DELAY=5, - GDAL_HTTP_VERSION=2, + GDAL_DISABLE_READDIR_ON_OPEN: str = "EMPTY_DIR" + GDAL_HTTP_TIMEOUT: int = 30 + GDAL_HTTP_MAX_RETRY: int = 3 + GDAL_HTTP_MERGE_CONSECUTIVE_RANGES: bool = True + GDAL_HTTP_MULTIPLEX: bool = True + GDAL_HTTP_RETRY_DELAY: int = 5 + GDAL_HTTP_VERSION: int = 2 # let GDAL cache internally - VSI_CACHE=True, + VSI_CACHE: bool = True # 5MB cache per file - VSI_CACHE_SIZE=5_000_000, -) -GDAL_HTTP_OPTS = _merge_gdal_defaults_with_env() -MAPCHETE_IO_RETRY_SETTINGS = { - "tries": int(os.environ.get("MAPCHETE_IO_RETRY_TRIES", "3")), - "delay": float(os.environ.get("MAPCHETE_IO_RETRY_DELAY", "1")), - "backoff": float(os.environ.get("MAPCHETE_IO_RETRY_BACKOFF", "1")), -} + VSI_CACHE_SIZE: int = 5_000_000 + model_config = SettingsConfigDict() + + +class IORetrySettings(BaseSettings): + tries: int = 3 + delay: float = 1.0 + backoff: float = 1.0 + model_config = SettingsConfigDict(env_prefix="MAPCHETE_IO_RETRY_") diff --git a/mapchete/io/vector.py b/mapchete/io/vector.py index c50cb375..e4885d1a 100644 --- a/mapchete/io/vector.py +++ b/mapchete/io/vector.py @@ -26,7 +26,7 @@ segmentize_geometry, to_shape, ) -from mapchete.io.settings import MAPCHETE_IO_RETRY_SETTINGS +from mapchete.io.settings import IORetrySettings from mapchete.path import MPath, fs_from_path, path_exists from mapchete.types import Bounds from mapchete.validate import validate_bounds @@ -324,7 +324,7 @@ def __exit__(self, *args): @retry( logger=logger, exceptions=(DriverError, FionaError, FionaValueError), - **MAPCHETE_IO_RETRY_SETTINGS, + **dict(IORetrySettings()), ) def _get_reprojected_features( inp=None, diff --git a/mapchete/path.py b/mapchete/path.py index a8b41373..3a074bfe 100644 --- a/mapchete/path.py +++ b/mapchete/path.py @@ -15,7 +15,7 @@ from rasterio.session import Session as RioSession from mapchete._executor import Executor -from mapchete.io.settings import GDAL_HTTP_OPTS +from mapchete.io.settings import GDALHTTPOptions from mapchete.tile import BufferedTile logger = logging.getLogger(__name__) @@ -380,7 +380,7 @@ def gdal_env_params( # for remote paths, we need some special settings if self.is_remote(): - gdal_opts = GDAL_HTTP_OPTS.copy() + gdal_opts = dict(GDALHTTPOptions()) # we cannot know at this point which file types the VRT or STACTA JSON # is pointing to, so in order to play safe, we remove the extensions constraint here diff --git a/pyproject.toml b/pyproject.toml index 73dd1c57..52ffab56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "numpy>=1.16", "oyaml", "pydantic>=2.3.0", + "pydantic_settings>=2.0.0", "pyproj", "python-dateutil", "rasterio>1.2.10", diff --git a/requirements.txt b/requirements.txt index a3145290..a4d37701 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ numpy>=1.16 oyaml>=0.9 pyproj pydantic>=2.3.0 +pydantic_settings>=2.0.0 retry>=0.9.2 rasterio>1.2.10 s3fs<2023.9.0