Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor sliderule io #26

Merged
merged 5 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ repos:
rev: "v4.6.0"
hooks:
- id: check-added-large-files
args: ["--maxkb=1000"]
- id: check-case-conflict
- id: check-merge-conflict
- id: check-symlinks
Expand Down
13 changes: 13 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,16 @@ Datasets
general.Dataset
maxar.open_browse
maxar.plot_browse


IO
--

.. currentmodule:: coincident.io

.. autosummary::
:toctree: generated/

sliderule.subset_gedi02a
sliderule.subset_atl06
sliderule.sample_3dep
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,4 @@
always_document_param_types = True
# autodoc_typehints = "none"
nb_execution_mode = "auto" # off, on
nb_execution_excludepatterns = ["sliderule.ipynb"]
1 change: 1 addition & 0 deletions docs/examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ This section contains Jupyter Notebooks with narrative workflows using the

quickstart
cascading_search
sliderule
```
483 changes: 483 additions & 0 deletions docs/examples/sliderule.ipynb

Large diffs are not rendered by default.

5,590 changes: 4,049 additions & 1,541 deletions pixi.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ docs = [
"myst-nb",
"myst_parser>=0.13",
"pydata-sphinx-theme>=0.16.0,<0.17",
#"rpds-py>=0.21.0,<0.22",
"sphinx>=7.0",
"sphinx_autodoc_typehints",
"sphinx_copybutton",
Expand Down Expand Up @@ -120,7 +121,7 @@ report.exclude_also = [
]

[tool.codespell]
skip="pixi.lock"
skip="pixi.lock,docs/examples/sliderule.ipynb"

[tool.mypy]
files = ["src", "tests"]
Expand Down Expand Up @@ -220,6 +221,7 @@ rioxarray = "*"
#s3fs = "*"
stac-geoparquet = "*"
pyarrow = "*"
jsonschema = ">=4.23.0,<5"
#nbconvert = ">=7.16.4,<8"
#cloudpathlib-s3 = ">=0.20.0,<0.21"
#matplotlib-base = ">=3.9.2,<4"
Expand All @@ -236,6 +238,7 @@ mypy = "*"
# Testing additional dependencies
#rich = ">=13.8.1,<14" # Optional. convenient for rich.print(dataset)
#xvec = ">=0.3.0,<0.4"
sliderule = ">=4.7.1,<5"

[tool.pixi.pypi-dependencies]
coincident = { path = ".", editable = false }
Expand Down
1 change: 0 additions & 1 deletion src/coincident/datasets/maxar.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ async def download_item(
return item # noqa: RET504


@depends_on_optional("matplotlib")
def open_browse(item: pystac.Item, overview_level: int = 0) -> xr.DataArray:
"""
Open a browse image from a STAC item using the specified overview level.
Expand Down
4 changes: 2 additions & 2 deletions src/coincident/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

from __future__ import annotations

from coincident.io.sliderule import load_gedi, load_icesat2
from coincident.io import sliderule

__all__ = ["load_icesat2", "load_gedi"]
__all__ = ["sliderule"]
188 changes: 138 additions & 50 deletions src/coincident/io/sliderule.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,158 @@
"""
Read ICESat-2 & GEDI using Sliderule
Subset and Sample using Sliderule
https://slideruleearth.io
"""

from __future__ import annotations

import warnings
from pathlib import Path
from typing import Any

import geopandas as gpd
import pandas as pd

try:
from sliderule import gedi, icesat2, toregion
from sliderule import earthdata, gedi, icesat2, raster, toregion
except ImportError:
warnings.warn(
"'sliderule' package not found. Install for GEDI & ICESat2 functionality: https://slideruleearth.io/web/rtd/getting_started/Install.html",
stacklevel=2,
)
from coincident._utils import depends_on_optional

# TODO: make these configurable?
default_params = {
"ats": 20.0,
"cnt": 5,
"len": 40.0,
"res": 20.0,
"maxi": 6,
}


@depends_on_optional("sliderule.gedi")
def load_gedi(
aoi: gpd.GeoDataFrame,
start_datetime: pd.Timestamp,
end_datetime: pd.Timestamp,
quality_flags: bool = True,
**kwargs: Any,

def _gdf_to_sliderule_polygon(gf: gpd.GeoDataFrame) -> list[dict[str, float]]:
# Ignore type necessary I think b/c sliderule doesn't have type hints?
return toregion(gf[["geometry"]].dissolve())["poly"] # type: ignore[no-any-return]


def _gdf_to_sliderule_params(gf: gpd.GeoDataFrame) -> dict[str, Any]:
# Sliderule expects single 'geometry' column
# Dissolve creates a single multipolygon from all geometries
sliderule_aoi = _gdf_to_sliderule_polygon(gf)
params = {}
params["poly"] = sliderule_aoi
params["t0"] = gf.start_datetime.min().tz_localize(None).isoformat()
params["t1"] = gf.end_datetime.max().tz_localize(None).isoformat()
return params


def _granule_from_assets(assets: gpd.GeoDataFrame) -> str:
# NOTE: change to while loop in case tons of assets?
for _k, v in assets.items():
if v.get("roles") == "data":
granule = Path(v.get("href")).name

return granule


@depends_on_optional("sliderule")
def subset_gedi02a(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
# NOTE: investigate B006 best-practice
sliderule_params: dict[str, Any] = {}, # noqa: B006
) -> gpd.GeoDataFrame:
# NOTE: allow passing private cluster?
gedi.init("slideruleearth.io")
params = default_params.copy()
if quality_flags:
params["degrade_flag"] = 0
params["l2_quality_flag"] = 1
params["poly"] = toregion(aoi)["poly"]
params["t0"] = start_datetime.tz_localize(None).isoformat()
params["t1"] = end_datetime.tz_localize(None).isoformat()
params.update(kwargs)
return gedi.gedi02ap(params)


@depends_on_optional("sliderule.icesat2")
def load_icesat2(
aoi: gpd.GeoDataFrame,
start_datetime: pd.Timestamp,
end_datetime: pd.Timestamp,
**kwargs: Any,
"""
Subsets GEDI L2A data based on a given GeoDataFrame and optional area of interest (AOI).

Parameters
----------
gf : gpd.GeoDataFrame
A GeoDataFrame of results from coincident.search.search(dataset='gedi')

aoi : gpd.GeoDataFrame, optional
A GeoDataFrame with a POLYGON to subset. If not provided the union of geometries in gf is used.

sliderule_params : dict[Any], optional
A dictionary of additional parameters to be passed to the sliderule `gedi.gedi02ap`.

Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the subsetted GEDI L2A data.

Notes
-----
The function sets `degrade_filter=True` and `l2_quality_filter=True` by default.
"""
params = _gdf_to_sliderule_params(gf)

granule_names = gf.assets.apply(_granule_from_assets).to_list()

if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)

params.update(
{
"degrade_filter": True,
"l2_quality_filter": True,
}
)

# User-provided parameters take precedence
params.update(sliderule_params)

return gedi.gedi02ap(params, resources=granule_names)


@depends_on_optional("sliderule")
def subset_atl06(
gf: gpd.GeoDataFrame,
aoi: gpd.GeoDataFrame = None,
dropna: bool = True,
sliderule_params: dict[str, Any] = {}, # noqa: B006
) -> gpd.GeoDataFrame:
# NOTE: allow passing private cluster?
# NOTE: deal with kwargs
icesat2.init("slideruleearth.io")
params = default_params.copy()
params["srt"] = icesat2.SRT_LAND
params["cnf"] = icesat2.CNF_SURFACE_HIGH
params["poly"] = toregion(aoi)["poly"]
params["t0"] = start_datetime.tz_localize(None).isoformat()
params["t1"] = end_datetime.tz_localize(None).isoformat()
params.update(kwargs)
return icesat2.atl06p(params)
params = _gdf_to_sliderule_params(gf)

# Note necessary but avoids another CMR search
granule_names = gf.assets.apply(_granule_from_assets).to_list()

if aoi is not None:
params["poly"] = _gdf_to_sliderule_polygon(aoi)

# User-provided parameters take precedence
params.update(sliderule_params)

data = icesat2.atl06sp(params, resources=granule_names)

# NOTE: add server-side filtering for NaNs in sliderule.atl06sp?
if dropna:
return data.dropna(subset=["h_li"])
return data


@depends_on_optional("sliderule")
def sample_worldcover(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
points = [[x, y] for x, y in zip(gf.geometry.x, gf.geometry.y, strict=False)]
return raster.sample("esa-worldcover-10meter", points)


@depends_on_optional("sliderule")
def sample_3dep(gf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""
Sample 3DEP 1-meter DEM with POINT geometries in GeoDataFrame.

Points should be (longitude, latitude) not UTM

Parameters
----------
gf : gpd.GeoDataFrame
A GeoDataFrame containing POINT geometries

Returns
-------
gpd.GeoDataFrame
A GeoDataFrame with sampled elevation data from the 3DEP 1-meter DEM.
"""
points = [[x, y] for x, y in zip(gf.geometry.x, gf.geometry.y, strict=False)]

poly = _gdf_to_sliderule_polygon(gf)
geojson = earthdata.tnm(
short_name="Digital Elevation Model (DEM) 1 meter", polygon=poly
)

# NOTE: make sliderule aware of 3dep polygons to not sample NaN regions for 1m grid?
data_3dep = raster.sample("usgs3dep-1meter-dem", points, {"catalog": geojson})

return data_3dep[data_3dep.value.notna()]
5 changes: 5 additions & 0 deletions src/coincident/search/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ def search(
**kwargs,
)

# Keep track of dataset alias in geodataframe metadata
# NOTE: attrs not always retrained and not saved to file
# https://github.com/geopandas/geopandas/issues/3320
gf.attrs["dataset"] = dataset.alias

return gf


Expand Down
18 changes: 18 additions & 0 deletions src/coincident/search/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@
except maxar_platform.session.NoSessionCredentials:
msg_noauth = "Unable to authenticate with Maxar API. Please set MAXAR_API_KEY environment variable."
warnings.warn(msg_noauth, stacklevel=2)
except maxar_platform.exceptions.UnAuthorizedException:
msg_noauth = (
"Unable to authenticate with Maxar API. Please check MAXAR_API_KEY is valid."
)
warnings.warn(msg_noauth, stacklevel=2)


def _filter_assets(assets: gpd.GeoDataFrame) -> dict[str, str]:
"""Remove key:None pairs from assets"""
keep_keys = []
for k, v in assets.items():
if v is not None:
keep_keys.append(k)

return {key: assets[key] for key in keep_keys}


def to_geopandas(
Expand Down Expand Up @@ -59,6 +74,9 @@ def to_geopandas(
record_batch_reader = stac_geoparquet.arrow.parse_stac_items_to_arrow(collection)
gf = gpd.GeoDataFrame.from_arrow(record_batch_reader) # doesn't keep arrow dtypes

# Workaround stac-geoparquet limitation https://github.com/stac-utils/stac-geoparquet/issues/82
gf["assets"] = gf["assets"].apply(_filter_assets)

# Additional columns for convenience
# NOTE: these become entries under STAC properties
gf["dayofyear"] = gf["datetime"].dt.dayofyear
Expand Down
2 changes: 2 additions & 0 deletions src/coincident/search/wesm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def stacify_column_names(gf: GeoDataFrame) -> GeoDataFrame:
"collect_end": "end_datetime",
}
gf = gf.rename(columns=name_map)
# Add collection name to match STAC (used to identify geodataframe contents in other functions)
gf["collection"] = "3DEP"
gf["start_datetime"] = pd.to_datetime(gf["start_datetime"])
gf["end_datetime"] = pd.to_datetime(gf["end_datetime"])
duration = gf.end_datetime - gf.start_datetime
Expand Down
Loading
Loading