Skip to content

Commit

Permalink
fix: address masking issues identified in #44 (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
ceholden authored Dec 17, 2024
1 parent dfdddc4 commit 2856231
Show file tree
Hide file tree
Showing 20 changed files with 200 additions and 7 deletions.
57 changes: 51 additions & 6 deletions hls_vi/generate_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ def read_granule_bands(input_dir: Path, id_str: str) -> Granule:
fmask = tif.read(1, masked=False)

tifnames = [f"{id_}.{band.name}.tif" for band in id_.instrument.bands]
data = [apply_fmask(read_band(input_dir / tifname), fmask) for tifname in tifnames]
data = apply_union_of_masks(
[apply_fmask(read_band(input_dir / tifname), fmask) for tifname in tifnames]
)
harmonized_bands = [band.value for band in id_.instrument.bands]

# Every band has the same CRS, transform, and tags, so we can use the first one to
Expand All @@ -180,8 +182,13 @@ def read_band(tif_path: Path) -> np.ma.masked_array:
with rasterio.open(tif_path) as tif:
data = tif.read(1, masked=True, fill_value=-9999) / 10_000

# Clamp surface reflectance values to the range [0, 1].
return np.ma.masked_outside(data, 0, 1)
# Clamp surface reflectance values to the range [0.0001, ∞]
# * We consider 0% reflectance to be invalid data
# * We want to retain values >100% reflectance. This is a known issue with
# atmospheric compensation where it's possible to have values >100% reflectance
# due to unmet assumptions about topography.
# See, https://github.com/NASA-IMPACT/hls-vi/issues/44#issuecomment-2520592212
return np.ma.masked_less_equal(data, 0)


def apply_fmask(data: np.ndarray, fmask: np.ndarray) -> np.ma.masked_array:
Expand All @@ -192,6 +199,29 @@ def apply_fmask(data: np.ndarray, fmask: np.ndarray) -> np.ma.masked_array:
return np.ma.masked_array(data, fmask & cloud_like != 0)


def apply_union_of_masks(bands: List[np.ma.masked_array]) -> List[np.ma.masked_array]:
"""Mask all bands according to valid data across all bands
This is intended to reduce noise by masking spectral indices if
any reflectance band is outside of expected range of values ([1, 10000]).
For example the NBR index only looks at NIR and SWIR bands, but we might have
negative reflectance in visible bands that indicate the retrieval has issues
and should not be used.
Reference: https://github.com/NASA-IMPACT/hls-vi/issues/44
"""
# NB - numpy masked arrays "true" is a masked value, "false" is unmasked
# so bitwise "or" will mask if "any" band has a masked value for that pixel
mask = np.ma.nomask
for band in bands:
mask = np.ma.mask_or(mask, band.mask)

for band in bands:
band.mask = mask

return bands


def select_tags(granule_id: GranuleId, tags: Tags) -> Tags:
"""
Selects tags from the input tags that are relevant to the HLS VI product.
Expand Down Expand Up @@ -250,11 +280,11 @@ def write_granule_index(
dtype=data.dtype,
crs=granule.crs,
transform=granule.transform,
nodata=data.fill_value,
nodata=index.fill_value,
) as dst:
dst.offsets = (0.0,)
dst.scales = (index.scale_factor,)
dst.write(data.filled(), 1)
dst.write(data.filled(fill_value=index.fill_value), 1)
dst.update_tags(
**granule.tags,
long_name=index.long_name,
Expand Down Expand Up @@ -329,7 +359,12 @@ class Index(Enum):
SAVI = ("Soil-Adjusted Vegetation Index",)
TVI = ("Triangular Vegetation Index", 0.01)

def __init__(self, long_name: str, scale_factor: SupportsFloat = 0.0001) -> None:
def __init__(
self,
long_name: str,
scale_factor: SupportsFloat = 0.0001,
fill_value: int = -19_999,
) -> None:
function_name = self.name.lower()
index_function: Optional[IndexFunction] = globals().get(function_name)

Expand All @@ -339,9 +374,19 @@ def __init__(self, long_name: str, scale_factor: SupportsFloat = 0.0001) -> None
self.long_name = long_name
self.compute_index = index_function
self.scale_factor = float(scale_factor)
self.fill_value = fill_value

def __call__(self, data: BandData) -> np.ma.masked_array:
scaled_index = self.compute_index(data) / self.scale_factor
scaled_index.fill_value = self.fill_value

# Before converting to int16 we want to clamp the values of our float to
# the min/max bounds of an int16 to prevent values wrapping around
int16_info = np.iinfo("int16")
scaled_index = np.ma.clip(
scaled_index, a_min=int16_info.min, a_max=int16_info.max
)

# We need to round to whole numbers (i.e., 0 decimal places, which is
# the default for np.round) because we convert to integer values, but
# numpy's conversion to integer types performs truncation, not rounding.
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
150 changes: 149 additions & 1 deletion tests/test_vi.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
from datetime import datetime
from pathlib import Path
from typing import Mapping, Optional, Tuple
from typing import Dict, List, Mapping, Optional, Tuple
from xml.etree import ElementTree as ET
import contextlib
import io
import json

import numpy as np
import pytest
import rasterio
from hls_vi.generate_metadata import generate_metadata
from hls_vi.generate_indices import (
Band,
Granule,
GranuleId,
Index,
apply_union_of_masks,
generate_vi_granule,
read_granule_bands,
)
from hls_vi.generate_stac_items import create_item

Expand Down Expand Up @@ -101,6 +106,149 @@ def remove_datetime_elements(tree: ET.ElementTree) -> ET.ElementTree:
return tree


def test_apply_union_of_masks():
bands = [
np.ma.array(
np.array([5, 4, 3, 2]),
mask=np.array([True, False, False, True]),
),
np.ma.array(
np.array([2, 3, 4, 5]),
mask=np.array([False, True, False, True]),
),
]
masked = apply_union_of_masks(bands)
# same masks for all
np.testing.assert_array_equal(masked[0].mask, masked[1].mask)
# test mask logic
np.testing.assert_array_equal(masked[0].mask, np.array([True, True, False, True]))


def create_fake_granule_data(
dest: Path, granule_id: str, sr: Dict[Band, int], fmask: int
):
"""Generate fake granule data for a single pixel"""
granule = GranuleId.from_string(granule_id)

profile = {
"height": 1,
"width": 1,
"count": 1,
"driver": "GTiff",
}
for band, value in sr.items():
band_id = granule.instrument.value[0](band).name
with rasterio.open(
dest / f"{granule_id}.{band_id}.tif",
"w",
dtype="int16",
nodata=-9999,
**profile,
) as dst:
data = np.array([[[value]]], dtype=np.int16)
dst.write(data)
dst.scales = (1 / 10_000,)

with rasterio.open(
dest / f"{granule_id}.Fmask.tif", "w", dtype="uint8", **profile
) as dst:
dst.write(np.array([[[fmask]]], dtype=np.uint8))


@pytest.mark.parametrize(
["reflectances", "fmask", "masked"],
[
pytest.param([42] * 6, int("00000000", 2), False, id="all clear"),
pytest.param([-9999] * 6, int("00000000", 2), True, id="input is nodata"),
pytest.param([-1] * 6, int("00000000", 2), True, id="all negative"),
pytest.param(
[-1, 42, 42, 42, 42, 42],
int("00000000", 2),
True,
id="one negative",
),
pytest.param(
[0, 42, 42, 42, 42, 42],
int("00000000", 2),
True,
id="zero reflectance",
),
pytest.param(
[10_001] * 6,
int("00000000", 2),
False,
id="above 100% reflectance",
),
pytest.param(
[42] * 6,
int("00000010", 2),
True,
id="cloudy",
),
pytest.param(
[42] * 6,
int("00000100", 2),
True,
id="cloud shadow",
),
pytest.param(
[42] * 6,
int("00001000", 2),
True,
id="adjacent to cloud / shadow",
),
pytest.param(
[42] * 6,
int("00001110", 2),
True,
id="cloud, cloud shadow, adjacent to cloud / shadow",
),
pytest.param(
[42] * 6,
int("11000000", 2),
False,
id="high aerosol not masked",
),
],
)
def test_granule_bands_masking(
tmp_path: pytest.TempPathFactory,
reflectances: List[int],
fmask: int,
masked: bool,
):
"""Test masking criteria based on rules,
1. Mask masked data values from input (-9999)
2. Mask <= 0% surface reflectance
3. Do not mask >100% reflectance
4. Apply Fmask when bits are set for,
i) cloud shadow
ii) adjacent to cloud shadow
iii) cloud
5. A mask pixel in _any_ band should mask should mask the same pixel in _all_
bands. This ensures the VI outputs from any combination of reflectance bands
will be masked.
"""
granule_id = "HLS.S30.T01GEL.2024288T213749.v2.0"
granule_data = dict(zip(Band, reflectances))
create_fake_granule_data(tmp_path, granule_id, granule_data, fmask)
granule = read_granule_bands(tmp_path, granule_id)

for reflectance, band in zip(reflectances, Band):
test_masked = granule.data[band].mask[0][0]
assert test_masked is np.bool_(masked)

test_value = granule.data[band].data[0][0]
# expected value will not be scaled if it's nodata value
if reflectance == -9999:
expected_value = -9999
else:
expected_value = np.round(reflectance / 10_000, 4)

assert test_value == expected_value


def assert_indices_equal(granule: Granule, actual_dir: Path, expected_dir: Path):
actual_tif_paths = sorted(actual_dir.glob("*.tif"))
actual_tif_names = [path.name for path in actual_tif_paths]
Expand Down

0 comments on commit 2856231

Please sign in to comment.