Skip to content

Commit

Permalink
basic data management, search, tests, ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
scottyhq committed Sep 25, 2024
1 parent 66a15d6 commit fb5893e
Show file tree
Hide file tree
Showing 18 changed files with 3,061 additions and 1,324 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
runs-on: [ubuntu-latest, windows-latest, macos-14]
# windows-latest,
runs-on: [ubuntu-latest, macos-14]
#include:
# - python-version: "pypy-3.10"
# runs-on: ubuntu-latest
Expand Down
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ calibration/validation, fusion algorithm development, and discipline-specific
scientific analysis.

**This tool is under active development, there are no stable releases yet!**
https://science.nasa.gov/earth-science/decadal-surveys/decadal-stv/coincident-datasets/

## Development

Expand All @@ -45,7 +46,19 @@ pixi run lint
pixi run docs
```

### Acknowledgements
## Authentication

Some datasets require authentication to _search_ (Maxar) others only require
authentication to _download_ data (NASA). `coincident` assumes you have the
following Environment Variables defined:

```bash
export EARTHDATA_USERNAME=xxxxx
export EARTHDATA_PASSWORD=yyyyy
export MAXAR_API_KEY=zzzzz
```

- Python packaging template provided by https://github.com/scientific-python/cookie
### Acknowledgements

- Python packaging template provided by
https://github.com/scientific-python/cookie
3,605 changes: 2,296 additions & 1,309 deletions pixi.lock

Large diffs are not rendered by default.

50 changes: 40 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"


[project]
name = "coincident"
authors = [
Expand Down Expand Up @@ -32,15 +31,19 @@ classifiers = [
]
dynamic = ["version"]
dependencies = [
"maxar-platform>=1.0.1,<2",
"geopandas>=1.0.1,<2",
"planetary-computer>=1.0.0,<2",
"stac-geoparquet>=0.6,<0.7",
"pystac-client>=0.8.3,<0.9",
]

[project.optional-dependencies]
dev = [
"ipykernel>=6.29.5,<7",
"mypy>=1.11.2,<2",
"pytest >=6",
"pytest-cov >=3",
"pre-commit>=3.8.0,<4"
"pre-commit>=3.8.0,<4",
]
docs = [
"sphinx>=7.0",
Expand All @@ -56,7 +59,6 @@ Homepage = "https://github.com/uw-cryo/coincident"
Discussions = "https://github.com/uw-cryo/coincident/discussions"
Changelog = "https://github.com/uw-cryo/coincident/releases"


[tool.hatch]
version.source = "vcs"
build.hooks.vcs.version-file = "src/coincident/_version.py"
Expand All @@ -65,19 +67,22 @@ build.hooks.vcs.version-file = "src/coincident/_version.py"
features = ["test"]
scripts.test = "pytest {args}"


[tool.pytest.ini_options]
minversion = "6.0"
addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
xfail_strict = true
filterwarnings = [
"error",
"ignore::DeprecationWarning:lark",
"ignore:maxar_plaform authentication error:UserWarning"
]
log_cli_level = "INFO"
testpaths = [
"tests",
]

markers = [
"network: marks test requiring internet (select with -o markers=network)",
]

[tool.coverage]
run.source = ["coincident"]
Expand All @@ -94,13 +99,15 @@ enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
warn_unreachable = true
disallow_untyped_defs = false
disallow_incomplete_defs = false
# NOTE: not sure why mypy not finding dependency libraries, so disabling this check...
# error: Cannot find implementation or library stub for module named "geopandas" [import-not-found]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "coincident.*"
disallow_untyped_defs = true
disallow_incomplete_defs = true


[tool.ruff]

[tool.ruff.lint]
Expand Down Expand Up @@ -140,7 +147,6 @@ isort.required-imports = ["from __future__ import annotations"]
"tests/**" = ["T20"]
"noxfile.py" = ["T20"]


[tool.pylint]
ignore-paths = [".*/_version.py"]
reports.output-format = "colorized"
Expand All @@ -154,7 +160,6 @@ messages_control.disable = [
"wrong-import-position",
]

# https://pixi.sh/latest/advanced/pyproject_toml/#initial-setup-of-the-pyprojecttoml-file
[tool.pixi.project]
channels = ["conda-forge"]
platforms = ["osx-arm64"]
Expand All @@ -168,17 +173,42 @@ dev = { features = ["dev"], solve-group = "default" }
docs = { features = ["docs"], solve-group = "default" }

[tool.pixi.tasks]
# Until release supporting pystac-client>0.8 # NOTE: this only goes into default environment :(
postinstall = "uv pip install maxar-platform"

# NOTE: pixi will match project.dependencies versions above but get them from conda-forge
# If a package is listed in project.dependencies but not repeated here, it is installed from pypi
[tool.pixi.dependencies]
uv = ">=0.4.14,<0.5"
geopandas = "*"
planetary-computer = "*"
pystac-client = "*"
stac-geoparquet = "*"
# NOTE: required to run notebooks in vscode
ipykernel = ">=6.29.5,<7"
rich = ">=13.8.1,<14" # Optional. convenient for rich.print(dataset)
pyarrow = "17.*"
fsspec = ">=2024.9.0,<2025"
libgdal-arrow-parquet = ">=3.9.2,<4"

[tool.pixi.feature.dev.dependencies]
pre-commit = "*"
pytest = "*"
ipykernel = "*"
mypy = "*"

[tool.pixi.feature.dev.tasks]
lint = "pre-commit run --all"
test = "pytest -ra --cov --cov-report=xml --cov-report=term"
# NOTE: environment variables picked up automatically
#test = {cmd = "pytest -o markers=network -m 'not network' --cov --cov-report=xml --cov-report=term", env = { MAXAR_API_KEY = "fake_key" }}
test = "pytest -o markers=network -m 'not network' --cov --cov-report=xml --cov-report=term"
networktest = "pytest --cov --cov-report=xml --cov-report=term"
postinstall = "uv pip install maxar-platform"

[tool.pixi.feature.docs.tasks]
docs = "python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs docs/_build/html"

# Isn't currently picked up by pixi, but works when uv is run as a postinstall task
[tool.uv]
# https://docs.astral.sh/uv/reference/settings/#override-dependencies
override-dependencies = ["pystac-client>=0.8"]
5 changes: 3 additions & 2 deletions src/coincident/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import annotations

from ._version import version as __version__
from coincident import datasets, overlaps, search
from coincident._version import version as __version__

__all__ = ["__version__"]
__all__ = ["__version__", "datasets", "search", "overlaps"]
29 changes: 29 additions & 0 deletions src/coincident/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Supported datasets
Files are organized by data provider, each of which can contain multiple datasets
ATL03_v6: https://cmr.earthdata.nasa.gov/search/concepts/C2596864127-NSIDC_CPRD
ATL06_v6: https://cmr.earthdata.nasa.gov/search/concepts/C2564427300-NSIDC_ECS
GEDI_2a_v2: https://cmr.earthdata.nasa.gov/search/concepts/C1908348134-LPDAAC_ECS # did this ID change?!
USGS_3DEP: https://cmr.earthdata.nasa.gov/search/concepts/C2021957295-LPCLOUD
WESM:
"""

from __future__ import annotations

from coincident.datasets import maxar, nasa, planetary_computer, usgs
from coincident.datasets.general import Dataset

# Convenience mapping of string aliases to supported dataset classes
_datasets = [
maxar.Stereo(),
usgs.ThreeDEP(),
nasa.ICESat2(),
nasa.GEDI(),
planetary_computer.COP30(),
]
_aliases = [x.alias for x in _datasets]
_alias_to_Dataset = dict(zip(_aliases, _datasets, strict=False))

__all__ = ["Dataset", "usgs", "maxar", "nasa", "planetary_computer"]
32 changes: 32 additions & 0 deletions src/coincident/datasets/general.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Supported datasets:
ATL03_v6: https://cmr.earthdata.nasa.gov/search/concepts/C2596864127-NSIDC_CPRD
ATL06_v6: https://cmr.earthdata.nasa.gov/search/concepts/C2564427300-NSIDC_ECS
GEDI_2a_v2: https://cmr.earthdata.nasa.gov/search/concepts/C1908348134-LPDAAC_ECS
USGS_3DEP: https://cmr.earthdata.nasa.gov/search/concepts/C2021957295-LPCLOUD
"""

# from pydantic.dataclasses import dataclass, Field # type: ignore[attr-defined]
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any


# NOTE: default to None for all of these?
@dataclass
class Dataset:
"""Essential metadata for supported datasets"""

alias: str | None = None # nickname
has_stac_api: bool | None = None # whether or not its a STAC API
collections: list[str] = field(
default_factory=list
) # STAC collection names of specific datasets
search: str | None = None # search API endpoint
start: str | None = None # first acquisition date
end: str | None = None # last acquisition date (or None if ongoing)
type: str | None = None # lidar | stereo | altimeter | sar
# Pystac client default limit=100, but seems set by API endpoint as well (nasa cmr-stac=25)
stac_kwargs: dict[str, Any] = field(default_factory=lambda: {"limit": 1000})
37 changes: 37 additions & 0 deletions src/coincident/datasets/maxar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
Maxar VHR stereo imagery
https://www.maxar.com/maxar-intelligence/products/satellite-imagery
"""

# from pydantic.dataclasses import dataclass, Field # type: ignore[attr-defined]
from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum

from coincident.datasets.general import Dataset


class Collection(str, Enum):
wv01 = "wv01"
wv02 = "wv02"
wv03_vnir = "wv03-vnir"
ge01 = "ge01"


@dataclass
class Stereo(Dataset):
"""Essential metadata for Maxar In Track Stere o"""

alias: str = "maxar"
has_stac_api: bool = True
collections: list[Collection] = field(
default_factory=lambda: ["wv01", "wv02", "wv03-vnir", "ge01"]
) # type: ignore[assignment]
search: str = "https://api.maxar.com/discovery/v1/search"
start: str = "2007-01-01"
end: str | None = None
type: str = "stereo"
# Unique to Maxar
area_based_calc: bool = False
43 changes: 43 additions & 0 deletions src/coincident/datasets/nasa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
NASA Datasets
https://github.com/nasa/cmr-stac
"""

# from pydantic.dataclasses import dataclass
from __future__ import annotations

from dataclasses import dataclass, field

from coincident.datasets.general import Dataset


@dataclass
class ICESat2(Dataset):
"""Essential metadata for ICESat-2 Alitmeter"""

# https://cmr.earthdata.nasa.gov/stac/NSIDC_ECS/collections/ATL03_006
has_stac_api: bool = True
search: str = "https://cmr.earthdata.nasa.gov/stac/NSIDC_ECS"
start: str | None = "2018-10-13"
type: str = "altimeter"
alias: str = "icesat-2"
collections: list[str] = field(
default_factory=lambda: ["ATL03_006"]
) # ATL08_006 etc.


@dataclass
class GEDI(Dataset):
"""Essential metadata for GEDI Altimeter"""

# NOTE: parse temporal & bbox from collection metadata?
# https://cmr.earthdata.nasa.gov/stac/LPCLOUD/collections/GEDI02_A_002
has_stac_api: bool = True
search: str = "https://cmr.earthdata.nasa.gov/stac/LPCLOUD"
start: str = "2019-04-04"
# https://www.earthdata.nasa.gov/news/nasa-announces-pause-gedi-mission
end: str = "2023-03-17"
type: str = "altimeter"
alias: str = "gedi"
collections: list[str] = field(default_factory=lambda: ["GEDI02_A_002"])
25 changes: 25 additions & 0 deletions src/coincident/datasets/neon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
NEON LiDAR data
https://data.neonscience.org/data-products/DP3.30024.001
NEON (National Ecological Observatory Network). Elevation - LiDAR (DP3.30024.001), provisional data. Dataset accessed from https://data.neonscience.org/data-products/DP3.30024.001 on September 11, 2024. Data archived at [your DOI].
"""

# from pydantic.dataclasses import dataclass
from __future__ import annotations

from dataclasses import dataclass

from coincident.datasets.general import Dataset


@dataclass
class NEON(Dataset):
"""Essential metadata for NEON"""

has_stac_api: bool = False
search: str = "https://data.neonscience.org/data-api"
start: str = "2013-06-01"
end: str = "2023-08-01"
type: str = "lidar"
alias: str = "neon"
24 changes: 24 additions & 0 deletions src/coincident/datasets/planetary_computer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Microsoft Planetary Computer
https://planetarycomputer.microsoft.com/docs/quickstarts/reading-stac/
"""

from __future__ import annotations

from dataclasses import dataclass, field

from coincident.datasets.general import Dataset


@dataclass
class COP30(Dataset):
"""Essential metadata for Copernicus DEM"""

alias: str = "cop30"
has_stac_api: bool = True
collections: list[str] = field(default_factory=lambda: ["cop-dem-glo-30"])
search: str = "https://planetarycomputer.microsoft.com/api/stac/v1"
start: str | None = None # NOTE: has 'representative' datetime of 2021-04-22
end: str | None = None
type: str = "sar"
Loading

0 comments on commit fb5893e

Please sign in to comment.