Skip to content

Commit

Permalink
fixes for tdx
Browse files Browse the repository at this point in the history
  • Loading branch information
scottyhq committed Oct 31, 2024
1 parent 3960508 commit 9911087
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions src/coincident/datasets/csda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any

from coincident.datasets.general import Dataset

Expand Down Expand Up @@ -53,3 +54,19 @@ class TDX(Dataset):
end: str | None = None
type: str = "sar"
provider: str = "csda"
stac_kwargs: dict[str, Any] = field(
default_factory=lambda: {
"limit": 1000,
"filter": {
"op": "and",
"args": [
# exclude PAZ, only SSC products
{
"op": "in",
"args": [{"property": "platform"}, ["TDX-1", "TSX-1"]],
},
{"op": "=", "args": [{"property": "sar:product_type"}, "SSC"]},
],
},
}
)
36 changes: 30 additions & 6 deletions src/coincident/search/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import Any

import geopandas as gpd
Expand Down Expand Up @@ -51,10 +52,10 @@ def search(
try:
dataset = _alias_to_Dataset[dataset]
except KeyError as e:
message = (
msg_unsupported = (
f"{dataset} is not a supported dataset: {_alias_to_Dataset.keys()}"
)
raise ValueError(message) from e
raise ValueError(msg_unsupported) from e

# Validate Datetimes
_validate_temporal_bounds(dataset, datetime)
Expand All @@ -81,6 +82,11 @@ def search(
shapely_geometry = shapely_geometry.reverse()
aoi = _pystac_client._format_intersects(shapely_geometry) # to JSON geometry
else:
if "bbox" not in kwargs:
msg_unconstrained = (
"Neither `bbox` nor `intersects` provided... search will be global"
)
warnings.warn(msg_unconstrained, stacklevel=2)
aoi = None

# STAC API Searches
Expand All @@ -94,8 +100,8 @@ def search(
if dataset.provider == "maxar":
# NOTE: not sure how to avoid incompatible type "str | None"; expected "str" for Dataset.attrs
client = stac.configure_maxar_client(dataset.area_based_calc) # type: ignore[attr-defined]
results = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(results)
item_collection = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(item_collection)
# Client-side reduce to only acquisitions having stereo pairs
gf = gf.loc[gf.stereo_pair_identifiers.str[0].dropna().index]

Expand All @@ -110,8 +116,26 @@ def search(
# Generic STAC endpoint w/o additional config
else:
client = stac.configure_stac_client(dataset.search) # type: ignore[arg-type]
results = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(results)
item_collection = stac.search(client, **stac_api_kwargs)

# Per-dataset munging
# https://github.com/uw-cryo/coincident/issues/8#issuecomment-2449810481
if dataset.alias == "tdx":
# Drop columns with messy schema
dropcols = [
"sceneInfo",
"missionInfo",
"previewInfo",
"imageDataInfo",
"generationInfo",
"acquisitionInfo",
"productVariantInfo",
]
for item in item_collection:
for col in dropcols:
item.properties.pop(col)

gf = stac.to_geopandas(item_collection)

# Non-STAC Searches
elif dataset.alias == "3dep":
Expand Down
29 changes: 28 additions & 1 deletion src/coincident/search/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,32 @@
def to_geopandas(
collection: pystac.item_collection.ItemCollection,
) -> gpd.GeoDataFrame:
"""Convert returned from STAC API to geodataframe via arrow"""
"""
Convert a STAC ItemCollection to a GeoDataFrame.
This function converts a given STAC ItemCollection to a GeoDataFrame using the
`stac_geoparquet.arrow.parse_stac_items_to_arrow` method. It also adds an additional
column 'dayofyear' for convenience.
Parameters
----------
collection : pystac.item_collection.ItemCollection
The STAC ItemCollection to be converted.
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the data from the STAC ItemCollection.
Raises
------
ValueError
If the provided ItemCollection is empty.
"""
# Catch if no items are passed
if len(collection) == 0:
message = "ItemCollection is empty, cannot convert to GeoDataFrame"
raise ValueError(message)

record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(collection)
gf = gpd.GeoDataFrame.from_arrow(record_batch_reader) # doesn't keep arrow dtypes

Expand Down Expand Up @@ -65,6 +90,8 @@ def search(
client: pystac_client.client.Client, **kwargs: dict[str, Any] | None
) -> pystac_client.item_search.ItemSearch:
"""Search any STAC API (e.g. https://github.com/nasa/cmr-stac)"""
# NOTE: add logging for kwargs?
# print(kwargs)
results = client.search(
**kwargs,
)
Expand Down
11 changes: 11 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ def test_polygon_invalid_type():
m.search.search(dataset="3dep", intersects="-120, 40, -121, 41")


def test_to_geopandas_empty_search_result():
with pytest.raises(ValueError, match="ItemCollection is empty"):
m.search.stac.to_geopandas([])


def test_unconstrained_search_warns():
with pytest.warns(match="Neither `bbox` nor `intersects` provided"):
m.search.search(dataset="tdx")


# TODO: add more assertions / tests for this section
@network
@pytest.mark.filterwarnings("ignore:Server does not conform")
Expand Down Expand Up @@ -98,6 +108,7 @@ def test_gedi_search(aoi):
def test_tdx_search(aoi):
gf = m.search.search(dataset="tdx", intersects=aoi, datetime=["2009", "2020"])
assert len(gf) == 48
assert gf["sar:product_type"].unique() == "SSC"


# MS PLANETARY COMPUTER
Expand Down

0 comments on commit 9911087

Please sign in to comment.