Skip to content

Commit

Permalink
Validate CMR XML using XML schema
Browse files Browse the repository at this point in the history
  • Loading branch information
chuckwondo committed Aug 1, 2024
1 parent 7fd356f commit f27d53d
Show file tree
Hide file tree
Showing 12 changed files with 2,951 additions and 78 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
!hls_vi/
!tests/
tests/fixtures/
!mypy.ini
!setup.py
!tox.ini
Empty file added hls_vi/__init__.py
Empty file.
34 changes: 18 additions & 16 deletions hls_vi/generate_indices.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# pyright: reportAttributeAccessIssue=false, reportOperatorIssue=false
# pyright: reportAttributeAccessIssue=false
# pyright: reportOperatorIssue=false
# pyright: reportOptionalOperand=false

import getopt
import os
Expand All @@ -8,7 +10,7 @@
from datetime import datetime, timezone
from enum import Enum, unique
from pathlib import Path
from typing import Callable, Mapping, Optional, SupportsFloat, Tuple, Type
from typing import Callable, List, Mapping, Optional, SupportsFloat, Tuple, Type
from typing_extensions import TypeAlias

import numpy as np
Expand Down Expand Up @@ -91,7 +93,7 @@ class Instrument(Enum):
def __init__(
self, band_type: Type[InstrumentBand], parse_satellite: Callable[[Tags], str]
) -> None:
self.bands = list(band_type)
self.bands: List[InstrumentBand] = list(band_type)
self.parse_satellite = parse_satellite

@classmethod
Expand Down Expand Up @@ -207,7 +209,7 @@ def select_tags(granule_id: GranuleId, tags: Tags) -> Tags:
}


def write_granule_indices(output_dir: Path, granule: Granule):
def write_granule_indices(output_dir: Path, granule: Granule) -> None:
os.makedirs(output_dir, exist_ok=True)
processing_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")

Expand All @@ -232,7 +234,7 @@ def write_granule_index(
granule: Granule,
index: "Index",
processing_time: str,
):
) -> None:
"""Save raster data to a GeoTIFF file using rasterio."""

data = index(granule.data)
Expand Down Expand Up @@ -264,16 +266,16 @@ def write_granule_index(

def evi(data: BandData) -> np.ma.masked_array:
b, r, nir = data[Band.B], data[Band.R], data[Band.NIR]
return 2.5 * (nir - r) / (nir + 6 * r - 7.5 * b + 1) # type: ignore
return 2.5 * (nir - r) / (nir + 6 * r - 7.5 * b + 1)


def msavi(data: BandData) -> np.ma.masked_array:
r, nir = data[Band.R], data[Band.NIR]
sqrt_term = (2 * nir + 1) ** 2 - 8 * (nir - r) # type: ignore
sqrt_term = (2 * nir + 1) ** 2 - 8 * (nir - r)

result: np.ma.masked_array = np.ma.where(
sqrt_term >= 0,
(2 * nir + 1 - np.sqrt(sqrt_term)) / 2, # type: ignore
(2 * nir + 1 - np.sqrt(sqrt_term)) / 2,
np.nan,
)
result.fill_value = r.fill_value
Expand All @@ -283,38 +285,38 @@ def msavi(data: BandData) -> np.ma.masked_array:

def nbr(data: BandData) -> np.ma.masked_array:
nir, swir2 = data[Band.NIR], data[Band.SWIR2]
return (nir - swir2) / (nir + swir2) # type: ignore
return (nir - swir2) / (nir + swir2)


def nbr2(data: BandData) -> np.ma.masked_array:
swir1, swir2 = data[Band.SWIR1], data[Band.SWIR2]
return (swir1 - swir2) / (swir1 + swir2) # type: ignore
return (swir1 - swir2) / (swir1 + swir2)


def ndmi(data: BandData) -> np.ma.masked_array:
nir, swir1 = data[Band.NIR], data[Band.SWIR1]
return (nir - swir1) / (nir + swir1) # type: ignore
return (nir - swir1) / (nir + swir1)


def ndvi(data: BandData) -> np.ma.masked_array:
r, nir = data[Band.R], data[Band.NIR]
return (nir - r) / (nir + r) # type: ignore
return (nir - r) / (nir + r)


def ndwi(data: BandData) -> np.ma.masked_array:
g, nir = data[Band.G], data[Band.NIR]
return (g - nir) / (g + nir) # type: ignore
return (g - nir) / (g + nir)


def savi(data: BandData) -> np.ma.masked_array:
r, nir = data[Band.R], data[Band.NIR]
return 1.5 * (nir - r) / (nir + r + 0.5) # type: ignore
return 1.5 * (nir - r) / (nir + r + 0.5)


def tvi(data: BandData) -> np.ma.masked_array:
g, r, nir = data[Band.G], data[Band.R], data[Band.NIR]
# We do NOT multiply by 10_000 like we do for other indices.
return (120 * (nir - g) - 200 * (r - g)) / 2 # type: ignore
return (120 * (nir - g) - 200 * (r - g)) / 2 # pyright: ignore[reportReturnType]


class Index(Enum):
Expand Down Expand Up @@ -386,7 +388,7 @@ def generate_vi_granule(input_dir: Path, output_dir: Path, id_str: str) -> Granu
return granule


def main():
def main() -> None:
input_dir, output_dir, id_str = parse_args()
generate_vi_granule(input_dir, output_dir, id_str)

Expand Down
26 changes: 16 additions & 10 deletions hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import getopt
import importlib_resources
import os
import sys
import xml.etree.ElementTree as ET

from datetime import datetime, timezone
from pathlib import Path
from typing import Tuple

import rasterio
from lxml import etree as ET
from lxml.etree import Element, ElementBase


def generate_metadata(input_dir: Path, output_dir: Path):
def generate_metadata(input_dir: Path, output_dir: Path) -> None:
"""
Create CMR XML metadata file for an HLS VI granule.
Expand All @@ -23,7 +26,7 @@ def generate_metadata(input_dir: Path, output_dir: Path):
to this directory with the name `HLS-VI.*.cmr.xml`.
"""
metadata_path = next(input_dir.glob("HLS.*.cmr.xml"))
tree = ET.parse(metadata_path)
tree = ET.parse(metadata_path, None)

with rasterio.open(next(output_dir.glob("*.tif"))) as vi_tif:
sensing_times = [t.strip() for t in vi_tif.tags()["SENSING_TIME"].split(";")]
Expand Down Expand Up @@ -61,24 +64,27 @@ def generate_metadata(input_dir: Path, output_dir: Path):
tree.find("Temporal/RangeDateTime/BeginningDateTime").text = sensing_time_begin
tree.find("Temporal/RangeDateTime/EndingDateTime").text = sensing_time_end

with (importlib_resources.files("hls_vi") / "schema" / "Granule.xsd").open() as xsd:
ET.XMLSchema(file=xsd).assertValid(tree)

tree.write(
output_dir / metadata_path.name.replace("HLS", "HLS-VI"),
encoding="utf-8",
xml_declaration=True,
)


def set_additional_attribute(attrs: ET.Element, name: str, value: str):
attr = attrs.find(f'./AdditionalAttribute[Name="{name}"]')
def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None:
attr = attrs.find(f'./AdditionalAttribute[Name="{name}"]', None)

if attr is not None:
attr.find(".//Value").text = value
else:
attr = ET.Element("AdditionalAttribute")
attr_name = ET.Element("Name")
attr = Element("AdditionalAttribute", None, None)
attr_name = Element("Name", None, None)
attr_name.text = name
attr_values = ET.Element("Values")
attr_value = ET.Element("Value")
attr_values = Element("Values", None, None)
attr_value = Element("Value", None, None)
attr_value.text = value
attr_values.append(attr_value)
attr.append(attr_name)
Expand Down Expand Up @@ -115,7 +121,7 @@ def parse_args() -> Tuple[Path, Path]:
return Path(input_dir), Path(output_dir)


def main():
def main() -> None:
input_dir, output_dir = parse_args()
generate_metadata(input_dir=input_dir, output_dir=output_dir)

Expand Down
37 changes: 21 additions & 16 deletions hls_vi/generate_stac_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import json
import argparse
from typing import Any, Mapping
import pystac
import rasterio
import untangle
Expand Down Expand Up @@ -80,7 +81,7 @@
}


def get_geometry(granule):
def get_geometry(granule: untangle.Element) -> MultiPolygon:
"""Function returns the geometry of the HLS_VI granule
Args:
Expand Down Expand Up @@ -109,7 +110,7 @@ def get_geometry(granule):
return MultiPolygon(multipolygon)


def process_common_metadata(item, granule):
def process_common_metadata(item: pystac.Item, granule: untangle.Element) -> None:
"""Function fetches and processes the general information from the granule metadata.
and updates the information in the STAC item.
Expand All @@ -133,14 +134,11 @@ def process_common_metadata(item, granule):
)
# For L30, the instrument is "OLI", but for S30, it is "Sentinel-2 MSI", we simply
# split on spaces and grab the last element, so we get either "OLI" or "MSI".
if " " in instrument:
item_instrument = instrument.split()[1]
else:
item_instrument = instrument
item_instrument = instrument.split()[1] if " " in instrument else instrument
item.common_metadata.instruments = [item_instrument]


def process_eo(item, granule):
def process_eo(item: pystac.Item, granule: untangle.Element) -> None:
"""Function processes the Earth observation information from the STAC item.
Args:
Expand All @@ -153,7 +151,9 @@ def process_eo(item, granule):
eo_extension.cloud_cover = float(attribute.Values.Value.cdata)


def add_assets(item, granule, endpoint, version):
def add_assets(
item: pystac.Item, granule: untangle.Element, endpoint: str, version: str
) -> None:
"""Function adds all the assets to the STAC item
Args:
Expand Down Expand Up @@ -191,7 +191,9 @@ def add_assets(item, granule, endpoint, version):
item.set_self_href(f"{public_url}{item_id}_stac.json")


def process_projection(item, granule, index_file):
def process_projection(
item: pystac.Item, granule: untangle.Element, index_file: str
) -> None:
"""Function fetches the projection information from the HLS_VI band file and
compares if the projection is same for the granule as well as the HLS_VI band image.
Expand All @@ -210,7 +212,7 @@ def process_projection(item, granule, index_file):
proj_ext.epsg = int(f"326{mgrs_tile_id[:2]}")


def process_view_geometry(item, granule):
def process_view_geometry(item: pystac.Item, granule: untangle.Element) -> None:
"""Function checks the geometry within the attributes of the STAC item and
the HLS_VI granule
Expand All @@ -222,11 +224,11 @@ def process_view_geometry(item, granule):
for attribute in granule.AdditionalAttributes.AdditionalAttribute:
if attribute.Name == "MEAN_SUN_AZIMUTH_ANGLE":
view_extension.sun_azimuth = float(attribute.Values.Value.cdata)
if attribute.Name == "MEAN_VIEW_AZIMUTH_ANGLE":
elif attribute.Name == "MEAN_VIEW_AZIMUTH_ANGLE":
view_extension.azimuth = float(attribute.Values.Value.cdata)


def process_scientific(item, granule):
def process_scientific(item: pystac.Item, granule: untangle.Element) -> None:
"""Function checks the attribute value in STAC item and the granule.
Args:
Expand All @@ -239,7 +241,7 @@ def process_scientific(item, granule):
scientific_extension.doi = attribute.Values.Value.cdata


def cmr_to_item(hls_vi_metadata, endpoint, version):
def cmr_to_item(hls_vi_metadata: str, endpoint: str, version: str) -> Mapping[str, Any]:
"""Function creates a pystac item from the CMR XML file provided as an input
Args:
Expand Down Expand Up @@ -280,10 +282,13 @@ def cmr_to_item(hls_vi_metadata, endpoint, version):
process_projection(item, granule, index_file)
process_view_geometry(item, granule)
process_scientific(item, granule)
return item.to_dict()

return item.to_dict() # type: ignore

def create_item(hls_vi_metadata, out_json, endpoint, version):

def create_item(
hls_vi_metadata: str, out_json: str, endpoint: str, version: str
) -> None:
"""Function acts as an endpoint to create a STAC item for the HLS_VI granule.
Args:
Expand All @@ -298,7 +303,7 @@ def create_item(hls_vi_metadata, out_json, endpoint, version):
json.dump(item, outfile)


def main():
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--cmr_xml",
Expand Down
Empty file added hls_vi/py.typed
Empty file.
Loading

0 comments on commit f27d53d

Please sign in to comment.