Skip to content

Commit

Permalink
Strip whitespace from datetime metadata fields
Browse files Browse the repository at this point in the history
Fixes #29
  • Loading branch information
chuckwondo committed Aug 1, 2024
1 parent c3e69aa commit 7fd356f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
2 changes: 1 addition & 1 deletion hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def generate_metadata(input_dir: Path, output_dir: Path):
tree = ET.parse(metadata_path)

with rasterio.open(next(output_dir.glob("*.tif"))) as vi_tif:
sensing_times = vi_tif.tags()["SENSING_TIME"].split(";")
sensing_times = [t.strip() for t in vi_tif.tags()["SENSING_TIME"].split(";")]
sensing_time_begin, sensing_time_end = sensing_times[0], sensing_times[-1]
processing_time = vi_tif.tags()["HLS_VI_PROCESSING_TIME"]

Expand Down
52 changes: 39 additions & 13 deletions tests/test_vi.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import contextlib
import io
import json
import re

import pytest
import rasterio
Expand All @@ -16,6 +17,8 @@
)
from hls_vi.generate_stac_items import create_item

ISO_8601_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"


def find_index_by_long_name(long_name: str) -> Index:
for index in Index:
Expand Down Expand Up @@ -53,9 +56,9 @@ def assert_tifs_equal(granule: Granule, actual: Path, expected: Path):
assert actual_time_str is not None
assert expected_time_str is not None

actual_time = datetime.strptime(actual_time_str, "%Y-%m-%dT%H:%M:%S.%fZ")
actual_time = datetime.strptime(actual_time_str, ISO_8601_DATETIME_FORMAT)
expected_time = datetime.strptime(
expected_time_str, "%Y-%m-%dT%H:%M:%S.%fZ"
expected_time_str, ISO_8601_DATETIME_FORMAT
)

# The actual time should be greater than the expected time because
Expand All @@ -71,7 +74,7 @@ def remove_item(
return {k: v for k, v in mapping.items() if k != key}, mapping.get(key)


def remove_element(root: ET.Element, path: str) -> None:
def remove_element(root: ET.Element, path: str) -> ET.Element:
parent_path = "/".join(path.split("/")[:-1])
parent = root.find(parent_path)
child = root.find(path)
Expand All @@ -81,17 +84,27 @@ def remove_element(root: ET.Element, path: str) -> None:

parent.remove(child)

return child

def remove_datetime_elements(tree: ET.ElementTree) -> ET.ElementTree:
root = tree.getroot()

remove_element(root, "./InsertTime")
remove_element(root, "./LastUpdate")
remove_element(root, "./DataGranule/ProductionDateTime")
remove_element(root, "./Temporal/RangeDateTime/BeginningDateTime")
remove_element(root, "./Temporal/RangeDateTime/EndingDateTime")
def remove_datetime_elements(
tree: ET.ElementTree,
) -> Tuple[ET.ElementTree, Tuple[ET.Element, ...]]:
root = tree.getroot()

return tree
return (
tree,
tuple(
remove_element(root, path)
for path in (
"./InsertTime",
"./LastUpdate",
"./DataGranule/ProductionDateTime",
"./Temporal/RangeDateTime/BeginningDateTime",
"./Temporal/RangeDateTime/EndingDateTime",
)
),
)


def assert_indices_equal(granule: Granule, actual_dir: Path, expected_dir: Path):
Expand All @@ -106,6 +119,16 @@ def assert_indices_equal(granule: Granule, actual_dir: Path, expected_dir: Path)
assert_tifs_equal(granule, actual_tif_path, expected_tif_path)


def is_valid_datetime(e: ET.Element) -> bool:
# The CMR accepts ISO 8601 datetime values, optionally with fractional seconds
# with 1 to 9 decimal digits. We are using a regex match because Python's
# strptime function supports only exactly 6 decimal digits, but some of the tif
# tag values include more than 6 decimal places.
return bool(
re.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}([.]\d{1,9})?Z", str(e.text))
)


@pytest.mark.parametrize(
argnames="input_dir,id_str",
argvalues=[
Expand Down Expand Up @@ -152,8 +175,11 @@ def test_generate_cmr_metadata(input_dir, output_dir):
try:
generate_metadata(input_dir=input_path, output_dir=output_path)

actual_metadata_tree = remove_datetime_elements(ET.parse(actual_metadata_path))
expected_metadata_tree = remove_datetime_elements(
actual_metadata_tree, dt_elements = remove_datetime_elements(
ET.parse(actual_metadata_path)
)
assert all(map(is_valid_datetime, dt_elements))
expected_metadata_tree, _ = remove_datetime_elements(
ET.parse(expected_metadata_path)
)

Expand Down

0 comments on commit 7fd356f

Please sign in to comment.