Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for tdx search #14

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions src/coincident/datasets/csda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any

from coincident.datasets.general import Dataset

Expand Down Expand Up @@ -53,3 +54,19 @@ class TDX(Dataset):
end: str | None = None
type: str = "sar"
provider: str = "csda"
stac_kwargs: dict[str, Any] = field(
default_factory=lambda: {
"limit": 1000,
"filter": {
"op": "and",
"args": [
# exclude PAZ, only SSC products
{
"op": "in",
"args": [{"property": "platform"}, ["TDX-1", "TSX-1"]],
},
{"op": "=", "args": [{"property": "sar:product_type"}, "SSC"]},
],
},
}
)
36 changes: 30 additions & 6 deletions src/coincident/search/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import Any

import geopandas as gpd
Expand Down Expand Up @@ -51,10 +52,10 @@ def search(
try:
dataset = _alias_to_Dataset[dataset]
except KeyError as e:
message = (
msg_unsupported = (
f"{dataset} is not a supported dataset: {_alias_to_Dataset.keys()}"
)
raise ValueError(message) from e
raise ValueError(msg_unsupported) from e

# Validate Datetimes
_validate_temporal_bounds(dataset, datetime)
Expand All @@ -81,6 +82,11 @@ def search(
shapely_geometry = shapely_geometry.reverse()
aoi = _pystac_client._format_intersects(shapely_geometry) # to JSON geometry
else:
if "bbox" not in kwargs:
msg_unconstrained = (
"Neither `bbox` nor `intersects` provided... search will be global"
)
warnings.warn(msg_unconstrained, stacklevel=2)
aoi = None

# STAC API Searches
Expand All @@ -94,8 +100,8 @@ def search(
if dataset.provider == "maxar":
# NOTE: not sure how to avoid incompatible type "str | None"; expected "str" for Dataset.attrs
client = stac.configure_maxar_client(dataset.area_based_calc) # type: ignore[attr-defined]
results = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(results)
item_collection = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(item_collection)
# Client-side reduce to only acquisitions having stereo pairs
gf = gf.loc[gf.stereo_pair_identifiers.str[0].dropna().index]

Expand All @@ -110,8 +116,26 @@ def search(
# Generic STAC endpoint w/o additional config
else:
client = stac.configure_stac_client(dataset.search) # type: ignore[arg-type]
results = stac.search(client, **stac_api_kwargs)
gf = stac.to_geopandas(results)
item_collection = stac.search(client, **stac_api_kwargs)

# Per-dataset munging
# https://github.com/uw-cryo/coincident/issues/8#issuecomment-2449810481
if dataset.alias == "tdx":
# Drop columns with messy schema
dropcols = [
"sceneInfo",
"missionInfo",
"previewInfo",
"imageDataInfo",
"generationInfo",
"acquisitionInfo",
"productVariantInfo",
]
for item in item_collection:
for col in dropcols:
item.properties.pop(col)

gf = stac.to_geopandas(item_collection)

# Non-STAC Searches
elif dataset.alias == "3dep":
Expand Down
29 changes: 28 additions & 1 deletion src/coincident/search/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,32 @@
def to_geopandas(
collection: pystac.item_collection.ItemCollection,
) -> gpd.GeoDataFrame:
"""Convert returned from STAC API to geodataframe via arrow"""
"""
Convert a STAC ItemCollection to a GeoDataFrame.
This function converts a given STAC ItemCollection to a GeoDataFrame using the
`stac_geoparquet.arrow.parse_stac_items_to_arrow` method. It also adds an additional
column 'dayofyear' for convenience.

Parameters
----------
collection : pystac.item_collection.ItemCollection
The STAC ItemCollection to be converted.

Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the data from the STAC ItemCollection.

Raises
------
ValueError
If the provided ItemCollection is empty.
"""
# Catch if no items are passed
if len(collection) == 0:
message = "ItemCollection is empty, cannot convert to GeoDataFrame"
raise ValueError(message)

record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(collection)
gf = gpd.GeoDataFrame.from_arrow(record_batch_reader) # doesn't keep arrow dtypes

Expand Down Expand Up @@ -65,6 +90,8 @@ def search(
client: pystac_client.client.Client, **kwargs: dict[str, Any] | None
) -> pystac_client.item_search.ItemSearch:
"""Search any STAC API (e.g. https://github.com/nasa/cmr-stac)"""
# NOTE: add logging for kwargs?
# print(kwargs)
results = client.search(
**kwargs,
)
Expand Down
11 changes: 11 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ def test_polygon_invalid_type():
m.search.search(dataset="3dep", intersects="-120, 40, -121, 41")


def test_to_geopandas_empty_search_result():
with pytest.raises(ValueError, match="ItemCollection is empty"):
m.search.stac.to_geopandas([])


def test_unconstrained_search_warns():
with pytest.warns(match="Neither `bbox` nor `intersects` provided"):
m.search.search(dataset="tdx")


# TODO: add more assertions / tests for this section
@network
@pytest.mark.filterwarnings("ignore:Server does not conform")
Expand Down Expand Up @@ -98,6 +108,7 @@ def test_gedi_search(aoi):
def test_tdx_search(aoi):
gf = m.search.search(dataset="tdx", intersects=aoi, datetime=["2009", "2020"])
assert len(gf) == 48
assert gf["sar:product_type"].unique() == "SSC"


# MS PLANETARY COMPUTER
Expand Down
Loading