Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update metadata to include HLS granule ID and links to Fmask layer #48

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion hls_vi/generate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,13 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
processing_time = tags["HLS_VI_PROCESSING_TIME"]

granule_ur = tree.find("GranuleUR")
input_granule_ur = granule_ur.text
granule_ur.text = granule_ur.text.replace("HLS", "HLS-VI")
set_additional_attribute(
tree.find("AdditionalAttributes"),
"Input_HLS_GranuleUR",
input_granule_ur,
)

time_format = "%Y-%m-%dT%H:%M:%S.%fZ"
formatted_date = datetime.now(timezone.utc).strftime(time_format)
Expand Down Expand Up @@ -125,6 +131,14 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:

tree.find("DataFormat").text = "COG"

append_fmask_online_access_urls(
tree.find("OnlineAccessURLs"),
input_granule_ur,
)

# ensure any added attributes are indented
ET.indent(tree)

with (
importlib_resources.files("hls_vi")
/ "schema"
Expand All @@ -142,7 +156,7 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None:
def normalize_additional_attributes(container: ElementBase) -> None:
"""Normalize additional attribute values.

On rare occassions, granule data is split and recombined upstream. When this
On rare occasions, granule data is split and recombined upstream. When this
occurs, the associated metadata is also split and recombined, resulting in values
for additional attributes that are created by joining the separate parts with the
string `" + "`.
Expand Down Expand Up @@ -193,6 +207,39 @@ def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None:
attrs.append(attr)


def append_fmask_online_access_urls(
access_urls: ElementBase, hls_granule_ur: str
) -> None:
"""Include links to Fmask layer from HLS granule in metadata

This is intended to help users find the relevant Fmask band without
having to duplicate it into the HLS-VI product. See,
https://github.com/NASA-IMPACT/hls-vi/issues/47
"""
prefix = "HLSL30.020" if hls_granule_ur.startswith("HLS.L30") else "HLSS30.020"

http_attr = Element("OnlineAccessURL", None, None)
http_attr_url = Element("URL", None, None)
http_attr_url.text = f"https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif" # noqa: E501
http_attr_desc = Element("URLDescription", None, None)
http_attr_desc.text = f"Download Fmask quality layer {hls_granule_ur}.Fmask.tif"
http_attr.append(http_attr_url)
http_attr.append(http_attr_desc)

s3_attr = Element("OnlineAccessURL", None, None)
s3_attr_url = Element("URL", None, None)
s3_attr_url.text = (
f"s3://lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif"
)
s3_attr_desc = Element("URLDescription", None, None)
s3_attr_desc.text = f"This link provides direct download access via S3 to the Fmask quality layer {hls_granule_ur}.Fmask.tif" # noqa: E501
s3_attr.append(s3_attr_url)
s3_attr.append(s3_attr_desc)

access_urls.append(http_attr)
access_urls.append(s3_attr)


def parse_args() -> Tuple[Path, Path]:
short_options = "i:o:"
long_options = ["instrument=", "inputdir=", "outputdir="]
Expand Down
3 changes: 2 additions & 1 deletion hls_vi/schema/Granule.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="GranuleDelete" type="GranuleDelete">
</xs:element>
<xs:element name="GranuleUR" type="GranuleUR"></xs:element>
<xs:element name="Input_GranuleUR" type="GranuleUR"></xs:element>
<xs:complexType name="GranuleMetaDataFile">
<xs:annotation>
<xs:documentation />
Expand Down Expand Up @@ -1298,7 +1299,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema">
type="ListOfAdditionalAttributeValues">
<xs:annotation>
<xs:documentation>The ordered list of values of the
additioanl attribute for this granule. The values will be
additional attribute for this granule. The values will be
kept in the order which they appear.</xs:documentation>
</xs:annotation>
</xs:element>
Expand Down
2 changes: 1 addition & 1 deletion hls_vi/schema/MetadataCommon.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@
</xs:sequence>
</xs:choice>
</xs:complexType>
<!-- #mark Emtpy Type -->
<!-- #mark Empty Type -->
<xs:complexType name="EmptyType">
<xs:annotation>
<xs:documentation>The element should contain no children. In
Expand Down
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
"dataclasses",
"geojson",
"importlib_resources",
"lxml==3.6.0",
"numpy~=1.19.0",
"lxml==5.3.0",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed there was a PR to pin this but I don't know the backstory,
#34

It looks like there (shockingly) is a wheel for lxml==5.3.0 for py36, so maybe this is fine? I'm guessing the pin to an older version was to match the old version of the C library in our container. Maybe we can bump this to use the binary and then drop the libxml2-dev and libxslt1-dev?

# 1.19 is latest for our container, but ~1.19 allows
# a more recent 1.x version for our host
"numpy~=1.19",
"pystac[validation]==1.0.0rc2",
"rasterio",
"shapely",
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.L30.T06WVS.2024120T211159</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -240,8 +240,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.L30.T06WVS.2024120T211159.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<DataGranule>
<ProducerGranuleId>HLS-VI.S30.T13RCN.2024128T173909</ProducerGranuleId>
<DayNightFlag>DAY</DayNightFlag>
<ProductionDateTime>UPDATE HLS Prodution DATETIME</ProductionDateTime>
<ProductionDateTime>UPDATE HLS Production DATETIME</ProductionDateTime>
<LocalVersionId>2.0</LocalVersionId>
</DataGranule>
<Temporal>
Expand Down Expand Up @@ -302,8 +302,22 @@
<Value>https://doi.org</Value>
</Values>
</AdditionalAttribute>
<AdditionalAttribute>
<Name>Input_HLS_GranuleUR</Name>
<Values>
<Value>HLS.S30.T13RCN.2024128T173909.v2.0</Value>
</Values>
</AdditionalAttribute>
</AdditionalAttributes>
<OnlineAccessURLs>
<OnlineAccessURL>
<URL>https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>Download Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
<OnlineAccessURL>
<URL>s3://lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URL>
<URLDescription>This link provides direct download access via S3 to the Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif</URLDescription>
</OnlineAccessURL>
</OnlineAccessURLs>
<OnlineResources>
</OnlineResources>
Expand Down