Skip to content

Commit

Permalink
Merge pull request #36 from NASA-IMPACT/fix/combined-additional-attri…
Browse files Browse the repository at this point in the history
…butes

Handle combined additional attributes
  • Loading branch information
sharkinsspatial authored Aug 27, 2024
2 parents 6649582 + 5f4f7dd commit a6c6d28
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 4 deletions.
39 changes: 38 additions & 1 deletion hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from datetime import datetime, timezone
from pathlib import Path
from typing import Tuple
from typing import List, Optional, Tuple

import rasterio
from lxml import etree as ET
Expand Down Expand Up @@ -112,6 +112,7 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
if "L30" in metadata_path.name
else "10.5067/HLS/HLSS30_VI.002",
)
normalize_additional_attributes(tree.find("AdditionalAttributes"))

data_granule = tree.find("DataGranule")
data_granule.remove(data_granule.find("DataGranuleSizeInBytes"))
Expand All @@ -132,6 +133,42 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
)


def normalize_additional_attributes(container: ElementBase) -> None:
"""Normalize additional attribute values.
On rare occassions, granule data is split and recombined upstream. When this
occurs, the associated metadata is also split and recombined, resulting in values
for additional attributes that are created by joining the separate parts with the
string `" + "`.
For example, the PROCESSING_BASELINE value of the HLS metadata resulting from this
scenario might be `05.11 + 05.11` instead of simply `05.11`. When the CMR contains
data type constraints on these additional attribute values, such values can cause
CMR to reject the metadata. Continuing this example, when PROCESSING_BASELINE is
constrained to `float` values, the string `05.11 + 05.11` will fail `float` parsing
and the CMR will raise an error.
Therefore, we must "normalize" such additional attribute values by simply splitting
around the `" + "` and (arbitrarily) using the first value as the value of the
additional attribute.
"""
attrs: List[ElementBase] = container.findall("./AdditionalAttribute", None)

for attr in attrs:
value_element: Optional[ElementBase] = attr.find(".//Value", None)
value_text: str = value_element.text if value_element is not None else ""

if value_element is not None:
# Replace the text of the additional attribute with the first value
# obtained by splitting the text on " + ". If the text does not contain
# " + ", the text remains the same. For example, "05.11".split(" + ") is
# simply ["05.11"], so taking the first element simply produces "05.11".
normalized = value_text.split(" + ", 1)[0].strip()
value_element.text = (
normalized # pyright: ignore[reportAttributeAccessIssue]
)


def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None:
attr = attrs.find(f'./AdditionalAttribute[Name="{name}"]', None)

Expand Down
1 change: 1 addition & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[mypy]
files = hls_vi
strict = True
pretty = True
show_error_codes = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@
<AdditionalAttribute>
<Name>PROCESSING_BASELINE</Name>
<Values>
<Value>05.10</Value>
<Value>05.11</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@
<AdditionalAttribute>
<Name>PROCESSING_BASELINE</Name>
<Values>
<Value>05.10</Value>
<Value>05.11 + 05.11</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ extras =
test
commands =
flake8
mypy hls_vi
mypy
pytest -vv --doctest-modules

0 comments on commit a6c6d28

Please sign in to comment.