diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..1779d39af --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,50 @@ +name: Test + +on: + pull_request: + branches: + - v2 # TODO: Remove! + - development + - main + push: + branches: + - main + - development + + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + + - name: Install package and test dependencies + run: | + # TODO: Once we have expressed dependencies in a more standard way: + # python -m pip install .[test] + python -m pip install . + python -m pip install -r requirements-dev.txt + + - name: Type-check package + run: mypy + + - name: Unit test + # TODO: Test behind EDL + run: pytest icepyx/ --verbose --cov app --ignore=icepyx/tests/test_behind_NSIDC_API_login.py --ignore=icepyx/tests/test_auth.py + + - name: Upload coverage report + uses: codecov/codecov-action@v4.5.0 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/icepyx/core/APIformatting.py b/icepyx/core/APIformatting.py index 90b7f3ac6..4a040dacd 100644 --- a/icepyx/core/APIformatting.py +++ b/icepyx/core/APIformatting.py @@ -1,6 +1,9 @@ -# Generate and format information for submitting to API (CMR and NSIDC) +"""Generate and format information for submitting to API (CMR and NSIDC).""" import datetime as dt +from typing import Any, Generic, Literal, TypeVar, overload + +from icepyx.core.types import CMRParams, EGISpecificParams, EGISpecificParamsSubset # ---------------------------------------------------------------------- @@ -122,6 +125,9 @@ def combine_params(*param_dicts): """ Combine multiple dictionaries into one. + Merging is performed in sequence using `dict.update()`; dictionaries later in the + list overwrite those earlier. + Parameters ---------- params : dictionaries @@ -129,7 +135,7 @@ def combine_params(*param_dicts): Returns ------- - single dictionary of all input dictionaries combined + A single dictionary of all input dictionaries combined Examples -------- @@ -181,12 +187,56 @@ def to_string(params): return "&".join(param_list) +ParameterType = Literal["CMR", "required", "subset"] +# DevGoal: When Python 3.12 is minimum supported version, migrate to PEP695 style +T = TypeVar("T", contravariant=False, bound=ParameterType) + + +class _FmtedKeysDescriptor: + """Enable the Parameters class' fmted_keys property to be typechecked correctly. + + See: https://github.com/microsoft/pyright/issues/3071#issuecomment-1043978070 + """ + + @overload + def __get__( # type: ignore + self, + instance: 'Parameters[Literal["CMR"]]', + owner: Any, + ) -> CMRParams: ... + + @overload + def __get__( + self, + instance: 'Parameters[Literal["required"]]', + owner: Any, + ) -> EGISpecificParams: ... + + @overload + def __get__( + self, + instance: 'Parameters[Literal["subset"]]', + owner: Any, + ) -> EGISpecificParamsSubset: ... + + def __get__( + self, + instance: "Parameters", + owner: Any, + ) -> CMRParams | EGISpecificParams: + """ + Returns the dictionary of formatted keys associated with the + parameter object. + """ + return instance._fmted_keys + + # ---------------------------------------------------------------------- # DevNote: Currently, this class is not tested!! # DevGoal: this could be expanded, similar to the variables class, to provide users with valid options if need be # DevGoal: currently this does not do much by way of checking/formatting of other subsetting options (reprojection or formats) # it would be great to incorporate that so that people can't just feed any keywords in... -class Parameters: +class Parameters(Generic[T]): """ Build and update the parameter lists needed to submit a data order @@ -204,12 +254,19 @@ class Parameters: on the type of query. Must be one of ['search','download'] """ - def __init__(self, partype, values=None, reqtype=None): + fmted_keys = _FmtedKeysDescriptor() + + def __init__( + self, + partype: T, + values=None, + reqtype=None, + ): assert partype in [ "CMR", "required", "subset", - ], "You need to submit a valid parametery type." + ], "You need to submit a valid parameter type." self.partype = partype if partype == "required": @@ -240,15 +297,7 @@ def poss_keys(self): # return self._wanted - @property - def fmted_keys(self): - """ - Returns the dictionary of formatted keys associated with the - parameter object. - """ - return self._fmted_keys - - def _get_possible_keys(self): + def _get_possible_keys(self) -> dict[str, list[str]]: """ Use the parameter type to get a list of possible parameter keys. """ @@ -345,7 +394,7 @@ def check_values(self): else: return False - def build_params(self, **kwargs): + def build_params(self, **kwargs) -> None: """ Build the parameter dictionary of formatted key:value pairs for submission to NSIDC in the data request. @@ -393,8 +442,6 @@ def build_params(self, **kwargs): self._fmted_keys.update({key: kwargs[key]}) except KeyError: self._fmted_keys.update({key: kwargs["product"]}) - elif key == "version": - self._fmted_keys.update({key: kwargs["version"]}) elif key in kwargs: self._fmted_keys.update({key: kwargs[key]}) elif key in defaults: @@ -411,7 +458,10 @@ def build_params(self, **kwargs): for key in opt_keys: if key == "Coverage" and key in kwargs: - # DevGoal: make there be an option along the lines of Coverage=default, which will get the default variables for that product without the user having to input is2obj.build_wanted_wanted_var_list as their input value for using the Coverage kwarg + # DevGoal: make an option along the lines of Coverage=default, + # which will get the default variables for that product without + # the user having to input is2obj.build_wanted_wanted_var_list + # as their input value for using the Coverage kwarg self._fmted_keys.update( {key: _fmt_var_subset_list(kwargs[key])} ) @@ -438,3 +488,8 @@ def build_params(self, **kwargs): k = "Boundingshape" self._fmted_keys.update({k: kwargs["spatial_extent"]}) + + +CMRParameters = Parameters[Literal["CMR"]] +RequiredParameters = Parameters[Literal["required"]] +SubsetParameters = Parameters[Literal["subset"]] diff --git a/icepyx/core/auth.py b/icepyx/core/auth.py index 8e263e730..c5f39a470 100644 --- a/icepyx/core/auth.py +++ b/icepyx/core/auth.py @@ -55,7 +55,7 @@ def __init__(self, auth=None): self._s3login_credentials = None self._s3_initial_ts = None # timer for 1h expiration on s3 credentials - def __str__(self): + def __str__(self) -> str: if self.session: repr_string = "EarthdataAuth obj with session initialized" else: diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 2c51e208b..b9f740eab 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import datetime import requests import time @@ -7,12 +9,15 @@ import numpy as np import os import pprint -from xml.etree import ElementTree as ET import zipfile +from requests.compat import unquote +from xml.etree import ElementTree as ET import icepyx.core.APIformatting as apifmt -from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.exceptions +from icepyx.core.auth import EarthdataAuthMixin +from icepyx.core.types import CMRParams, EGISpecificParams +from icepyx.core.urls import DOWNLOAD_BASE_URL, ORDER_BASE_URL, GRANULE_SEARCH_BASE_URL def info(grans): @@ -168,14 +173,19 @@ def __init__( # ---------------------------------------------------------------------- # Methods - def get_avail(self, CMRparams, reqparams, cloud=False): + def get_avail( + self, + CMRparams: CMRParams | None, + reqparams: EGISpecificParams | None, + cloud=False, + ): """ Get a list of available granules for the query object's parameters. Generates the `avail` attribute of the granules object. Parameters ---------- - CMRparams : dictionary + CMRparams : Dictionary of properly formatted CMR search parameters. reqparams : dictionary Dictionary of properly formatted parameters required for searching, ordering, @@ -201,8 +211,6 @@ def get_avail(self, CMRparams, reqparams, cloud=False): # if not hasattr(self, 'avail'): self.avail = [] - granule_search_url = "https://cmr.earthdata.nasa.gov/search/granules" - headers = {"Accept": "application/json", "Client-Id": "icepyx"} # note we should also check for errors whenever we ping NSIDC-API - # make a function to check for errors @@ -220,7 +228,7 @@ def get_avail(self, CMRparams, reqparams, cloud=False): headers["CMR-Search-After"] = cmr_search_after response = requests.get( - granule_search_url, + GRANULE_SEARCH_BASE_URL, headers=headers, params=apifmt.to_string(params), ) @@ -261,13 +269,13 @@ def get_avail(self, CMRparams, reqparams, cloud=False): # DevGoal: add kwargs to allow subsetting and more control over request options. def place_order( self, - CMRparams, - reqparams, + CMRparams: CMRParams, + reqparams: EGISpecificParams, subsetparams, verbose, subset=True, geom_filepath=None, - ): # , **kwargs): + ): """ Place an order for the available granules for the query object. Adds the list of zipped files (orders) to the granules data object (which is @@ -276,11 +284,11 @@ def place_order( Parameters ---------- - CMRparams : dictionary + CMRparams : Dictionary of properly formatted CMR search parameters. - reqparams : dictionary + reqparams : Dictionary of properly formatted parameters required for searching, ordering, - or downloading from NSIDC. + or downloading from EGI. subsetparams : dictionary Dictionary of properly formatted subsetting parameters. An empty dictionary is passed as input here when subsetting is set to False in query methods. @@ -308,8 +316,6 @@ def place_order( query.Query.order_granules """ - base_url = "https://n5eil02u.ecs.nsidc.org/egi/request" - self.get_avail(CMRparams, reqparams) if subset is False: @@ -345,7 +351,7 @@ def place_order( ) request_params.update({"page_num": page_num}) - request = self.session.get(base_url, params=request_params) + request = self.session.get(ORDER_BASE_URL, params=request_params) # DevGoal: use the request response/number to do some error handling/ # give the user better messaging for failures @@ -361,7 +367,7 @@ def place_order( request.raise_for_status() esir_root = ET.fromstring(request.content) if verbose is True: - print("Order request URL: ", requests.utils.unquote(request.url)) + print("Order request URL: ", unquote(request.url)) print( "Order request response XML content: ", request.content.decode("utf-8"), @@ -377,7 +383,7 @@ def place_order( print("order ID: ", orderID) # Create status URL - statusURL = base_url + "/" + orderID + statusURL = ORDER_BASE_URL + "/" + orderID if verbose is True: print("status URL: ", statusURL) @@ -399,6 +405,7 @@ def place_order( print("Initial status of your order request at NSIDC is: ", status) # Continue loop while request is still processing + loop_root = None while status == "pending" or status == "processing": print( "Your order status is still ", @@ -422,6 +429,13 @@ def place_order( if status == "pending" or status == "processing": continue + if not isinstance(loop_root, ET.Element): + # The typechecker determined that loop_root could be unbound at this + # point. We know for sure this shouldn't be possible, though, because + # the while loop should run once. + # See: https://github.com/microsoft/pyright/discussions/2033 + raise RuntimeError("Programmer error!") + # Order can either complete, complete_with_errors, or fail: # Provide complete_with_errors error message: if status == "complete_with_errors" or status == "failed": @@ -522,7 +536,7 @@ def download(self, verbose, path, restart=False): i_order = self.orderIDs.index(order_start) + 1 for order in self.orderIDs[i_order:]: - downloadURL = "https://n5eil02u.ecs.nsidc.org/esir/" + order + ".zip" + downloadURL = f"{DOWNLOAD_BASE_URL}/{order}.zip" # DevGoal: get the download_url from the granules if verbose is True: diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index be3a3c8da..2d20019c3 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -7,6 +7,8 @@ import earthaccess +from icepyx.core.urls import COLLECTION_SEARCH_BASE_URL, EGI_BASE_URL + # ICESat-2 specific reference functions @@ -82,8 +84,7 @@ def about_product(prod): query.Query.product_all_info """ - cmr_collections_url = "https://cmr.earthdata.nasa.gov/search/collections.json" - response = requests.get(cmr_collections_url, params={"short_name": prod}) + response = requests.get(COLLECTION_SEARCH_BASE_URL, params={"short_name": prod}) results = json.loads(response.content) return results @@ -101,9 +102,7 @@ def _get_custom_options(session, product, version): "Don't forget to log in to Earthdata using query.earthdata_login()" ) - capability_url = ( - f"https://n5eil02u.ecs.nsidc.org/egi/capabilities/{product}.{version}.xml" - ) + capability_url = f"{EGI_BASE_URL}/capabilities/{product}.{version}.xml" response = session.get(capability_url) root = ET.fromstring(response.content) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index ed241662b..fc2e96ff0 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1,6 +1,7 @@ import geopandas as gpd import matplotlib.pyplot as plt import pprint +from typing_extensions import Never import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin @@ -11,6 +12,7 @@ import icepyx.core.spatial as spat import icepyx.core.temporal as tp import icepyx.core.validate_inputs as val +from icepyx.core.types import CMRParams, EGISpecificParams, EGISpecificParamsSubset from icepyx.core.variables import Variables as Variables from icepyx.core.visualization import Visualize @@ -392,6 +394,10 @@ class Query(GenQuery, EarthdataAuthMixin): GenQuery """ + _CMRparams: apifmt.CMRParameters + _reqparams: apifmt.RequiredParameters + _subsetparams: apifmt.SubsetParameters | None + # ---------------------------------------------------------------------- # Constructors @@ -531,7 +537,7 @@ def tracks(self): return sorted(set(self._tracks)) @property - def CMRparams(self): + def CMRparams(self) -> CMRParams: """ Display the CMR key:value pairs that will be submitted. It generates the dictionary if it does not already exist. @@ -572,7 +578,7 @@ def CMRparams(self): return self._CMRparams.fmted_keys @property - def reqparams(self): + def reqparams(self) -> EGISpecificParams: """ Display the required key:value pairs that will be submitted. It generates the dictionary if it does not already exist. @@ -598,7 +604,7 @@ def reqparams(self): # @property # DevQuestion: if I make this a property, I get a "dict" object is not callable # when I try to give input kwargs... what approach should I be taking? - def subsetparams(self, **kwargs): + def subsetparams(self, **kwargs) -> EGISpecificParamsSubset | dict[Never, Never]: """ Display the subsetting key:value pairs that will be submitted. It generates the dictionary if it does not already exist @@ -1000,7 +1006,7 @@ def order_granules(self, verbose=False, subset=True, email=False, **kwargs): if "email" in self._reqparams.fmted_keys or email is False: self._reqparams.build_params(**self._reqparams.fmted_keys) elif email is True: - user_profile = self.auth.get_user_profile() + user_profile = self.auth.get_user_profile() # pyright: ignore[reportAttributeAccessIssue] self._reqparams.build_params( **self._reqparams.fmted_keys, email=user_profile["email_address"] ) @@ -1134,14 +1140,14 @@ def visualize_spatial_extent( from shapely.geometry import Polygon # noqa: F401 import geoviews as gv - gv.extension("bokeh") + gv.extension("bokeh") # pyright: ignore[reportCallIssue] bbox_poly = gv.Path(gdf["geometry"]).opts(color="red", line_color="red") tile = gv.tile_sources.EsriImagery.opts(width=500, height=500) - return tile * bbox_poly + return tile * bbox_poly # pyright: ignore[reportOperatorIssue] except ImportError: - world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) + world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) # pyright: ignore[reportAttributeAccessIssue] f, ax = plt.subplots(1, figsize=(12, 6)) world.plot(ax=ax, facecolor="lightgray", edgecolor="gray") gdf.plot(ax=ax, color="#FF8C00", alpha=0.7) diff --git a/icepyx/core/types.py b/icepyx/core/types.py new file mode 100644 index 000000000..374e1da5a --- /dev/null +++ b/icepyx/core/types.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from typing import Literal, TypedDict +from typing_extensions import NotRequired + + +CMRParamsBase = TypedDict( + "CMRParamsBase", + { + "temporal": NotRequired[str], + "options[readable_granule_name][pattern]": NotRequired[str], + "options[spatial][or]": NotRequired[str], + "readable_granule_name[]": NotRequired[str], + }, +) + + +class CMRParamsWithBbox(CMRParamsBase): + bounding_box: str + + +class CMRParamsWithPolygon(CMRParamsBase): + polygon: str + + +CMRParams = CMRParamsWithBbox | CMRParamsWithPolygon + + +class EGISpecificParamsBase(TypedDict): + """Common parameters for searching, ordering, or downloading from EGI. + + See: https://wiki.earthdata.nasa.gov/display/SDPSDOCS/EGI+Programmatic+Access+Documentation + + EGI shares parameters with CMR, so this data is used in conjunction with CMRParams + to build EGI requests. + + TODO: Validate more strongly (with Pydantic and its annotated types? + https://docs.pydantic.dev/latest/concepts/types/#composing-types-via-annotated): + + * short_name is `ATL##` (or Literal list of values?) + * version is 1-3 digits + * 0 < page_size <= 2000 + """ + + short_name: str # alias: "product" + version: str + page_size: int # default 2000 + page_num: int # default 0 + + +class EGISpecificParamsSearch(EGISpecificParamsBase): + """Parameters for searching through EGI.""" + + +class EGISpecificParamsOrder(EGISpecificParamsBase): + """Parameters for ordering through EGI.""" + + # TODO: Does this type need page_* attributes? + + +class EGISpecificParamsDownload(EGISpecificParamsBase): + """Parameters for ordering from EGI. + + TODO: Validate more strongly (with Pydantic?): page_num >=0. + """ + + request_mode: Literal["sync", "async", "stream"] # default "async" + include_meta: Literal["Y", "N"] # default "Y" + client_string: Literal["icepyx"] # default "icepyx" + # token, email + + +class EGISpecificParamsSubset(EGISpecificParamsBase): + """Parameters for subsetting with EGI.""" + + +EGISpecificParams = ( + EGISpecificParamsSearch | EGISpecificParamsDownload | EGISpecificParamsSubset +) diff --git a/icepyx/core/urls.py b/icepyx/core/urls.py new file mode 100644 index 000000000..8c5bc325b --- /dev/null +++ b/icepyx/core/urls.py @@ -0,0 +1,10 @@ +from typing import Final + +CMR_BASE_URL: Final = "https://cmr.earthdata.nasa.gov" +GRANULE_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/granules" +COLLECTION_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/collections.json" + +EGI_BASE_URL: Final = "https://n5eil02u.ecs.nsidc.org/egi" +ORDER_BASE_URL: Final = f"{EGI_BASE_URL}/request" + +DOWNLOAD_BASE_URL: Final = "https://n5eil02u.ecs.nsidc.org/esir" diff --git a/icepyx/core/visualization.py b/icepyx/core/visualization.py index 5df884e2d..10355df0b 100644 --- a/icepyx/core/visualization.py +++ b/icepyx/core/visualization.py @@ -466,7 +466,7 @@ def parallel_request_OA(self) -> da.array: OA_data_da = da.concatenate(requested_OA_data, axis=0) return OA_data_da - def viz_elevation(self) -> (hv.DynamicMap, hv.Layout): + def viz_elevation(self) -> tuple[hv.DynamicMap, hv.Layout]: """ Visualize elevation requested from OpenAltimetry API using datashader based on cycles https://holoviz.org/tutorial/Large_Data.html diff --git a/pyproject.toml b/pyproject.toml index 66eb7b6f9..94861ff84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,3 +102,65 @@ ignore = [ # Ignore too many leading '#' for block comment "*/tests/*" = ["E266"] + + +[tool.pyright] +pythonVersion = "3.10" +typeCheckingMode = "standard" +include = [ + "icepyx", +] +exclude = [ + "**/__pycache__", + "icepyx/tests", +] +ignore = [ + "icepyx/quest/*", + "icepyx/core/APIformatting.py", + "icepyx/core/auth.py", + "icepyx/core/exceptions.py", + "icepyx/core/icesat2data.py", + "icepyx/core/is2ref.py", + "icepyx/core/read.py", + "icepyx/core/spatial.py", + "icepyx/core/temporal.py", + "icepyx/core/validate_inputs.py", + "icepyx/core/variables.py", + "icepyx/core/visualization.py", +] + + +[tool.mypy] +files = "icepyx/." +mypy_path = ["icepyx"] +# TODO: Enable ASAP: +check_untyped_defs = false +# TODO: Then enable: +disallow_untyped_defs = false +disallow_incomplete_defs = false +# TODO: Finally enable (and delete settings above; strict includes them): +strict = false + +[[tool.mypy.overrides]] +# Help us adopt mypy by skipping some modules entirely +# GOAL: Remove this whole section! +module = [ + "icepyx.core.read", + "icepyx.core.visualization", + "icepyx.quest.dataset_scripts.argo", + "icepyx.tests.test_Earthdata", +] +ignore_errors = true + +[[tool.mypy.overrides]] +module = [ + "dask.*", + "datashader.*", + "h5py.*", + "holoviews.*", + "geopandas.*", + "geoviews.*", + "pypistats.*", + "shapely.*", +] +ignore_missing_imports = true diff --git a/requirements-dev.txt b/requirements-dev.txt index 66106dab8..8a11cd9e4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,3 +3,8 @@ pypistats pytest>=4.6 pytest-cov responses +mypy +pandas-stubs +types-docutils +types-requests +types-tqdm