Skip to content

Commit

Permalink
FIX: Fix the ability to save COGs with any dtype with Dask, with the …
Browse files Browse the repository at this point in the history
…workaround described here (don't compute statistics for problematic dtypes)
  • Loading branch information
remi-braun committed Dec 3, 2024
1 parent 9a1511e commit 10e7da7
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 12 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Release History

## 1.43.5 (2024-mm-dd)

- FIX: Fix the ability to save COGs with any dtype with Dask, with the workaround described [here](https://github.com/opendatacube/odc-geo/issues/189#issuecomment-2513450481) (don't compute statistics for problematic dtypes)

## 1.43.4 (2024-11-28)

- FIX: Fix regression in `files.read_json` due to Python 3.11 where `datatime.fromsioformat` parses more than the output of `isoformat()`
Expand Down
24 changes: 13 additions & 11 deletions CI/SCRIPTS/test_rasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
from CI.SCRIPTS.script_utils import KAPUT_KWARGS, dask_env, rasters_path, s3_env
from sertit import ci, path, rasters, vectors
from sertit.rasters import (
FLOAT_NODATA,
INT8_NODATA,
UINT8_NODATA,
UINT16_NODATA,
any_raster_to_xr_ds,
get_nodata_value,
get_nodata_value_from_dtype,
Expand Down Expand Up @@ -491,14 +495,14 @@ def _test_raster_after_write(test_path, dtype, nodata_val):
@pytest.mark.parametrize(
("dtype", "nodata_val"),
[
pytest.param(np.uint8, 255),
pytest.param(np.int8, -128),
pytest.param(np.uint16, 65535),
pytest.param(np.int16, -9999),
pytest.param(np.uint32, 65535),
pytest.param(np.int32, 65535),
pytest.param(np.float32, -9999),
pytest.param(np.float64, -9999),
pytest.param(np.uint8, UINT8_NODATA),
pytest.param(np.int8, INT8_NODATA),
pytest.param(np.uint16, UINT16_NODATA),
pytest.param(np.int16, FLOAT_NODATA),
pytest.param(np.uint32, UINT16_NODATA),
pytest.param(np.int32, UINT16_NODATA),
pytest.param(np.float32, FLOAT_NODATA),
pytest.param(np.float64, FLOAT_NODATA),
],
)
def test_write(dtype, nodata_val, tmp_path):
Expand All @@ -522,9 +526,7 @@ def test_write(dtype, nodata_val, tmp_path):

# -------------------------------------------------------------------------------------------------
# Test COGs
# Remove some problematic (for now) dtypes
# https://github.com/numpy/numpy/issues/25677#issuecomment-2236081970
if dtype in [np.float32, np.float64, np.uint32, np.int32]:
if dtype not in [np.int8]:
rasters.write(
raster_xds,
test_cog_path,
Expand Down
9 changes: 8 additions & 1 deletion sertit/rasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ def write(
kwargs["compress"].lower() in ["lzw", "deflate", "zstd"]
and "predictor" not in kwargs # noqa: W503
):
if xds.encoding["dtype"] in [np.float32, np.float64, float]:
if xds.encoding["dtype"] in [np.float16, np.float32, np.float64, float]:
kwargs["predictor"] = "3"
else:
kwargs["predictor"] = "2"
Expand All @@ -1183,11 +1183,18 @@ def write(
from odc.geo import cog, xr # noqa

LOGGER.debug("Writing your COG with Dask!")

# Remove computing statistics for some problematic (for now) dtypes (we need the ability to cast 999999 inside it)
# OverflowError: Python integer 999999 out of bounds for xxx
# https://github.com/opendatacube/odc-geo/issues/189#issuecomment-2513450481
compute_stats = np.dtype(dtype).itemsize >= 4

cog.save_cog_with_dask(
xds.copy(data=xds.fillna(nodata).astype(dtype)).rio.set_nodata(
nodata
),
str(path),
stats=compute_stats,
).compute()
is_written = True

Expand Down

0 comments on commit 10e7da7

Please sign in to comment.