From fe504ec311e0eb20b988446748dd8b433e2559bd Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 16 Sep 2024 11:51:16 -0600 Subject: [PATCH 01/17] Use CMR and EDL "UAT" environments User Acceptance Testing --- icepyx/core/auth.py | 2 +- icepyx/core/urls.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/icepyx/core/auth.py b/icepyx/core/auth.py index 71b4393e5..d94802c64 100644 --- a/icepyx/core/auth.py +++ b/icepyx/core/auth.py @@ -68,7 +68,7 @@ def auth(self): """ # Only login the first time .auth is accessed if self._auth is None: - auth = earthaccess.login() + auth = earthaccess.login(system=earthaccess.system.UAT) # check for a valid auth response if auth.authenticated is False: raise AuthenticationError( diff --git a/icepyx/core/urls.py b/icepyx/core/urls.py index 8c5bc325b..70d10bc74 100644 --- a/icepyx/core/urls.py +++ b/icepyx/core/urls.py @@ -1,6 +1,6 @@ from typing import Final -CMR_BASE_URL: Final = "https://cmr.earthdata.nasa.gov" +CMR_BASE_URL: Final = "https://cmr.uat.earthdata.nasa.gov" GRANULE_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/granules" COLLECTION_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/collections.json" From b62764206a5ffc5013af84be11c167eb83ce83e9 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 16 Sep 2024 18:20:02 -0600 Subject: [PATCH 02/17] Add migration notes document --- HARMONY_MIGRATION_NOTES.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 HARMONY_MIGRATION_NOTES.md diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md new file mode 100644 index 000000000..4f2e275bb --- /dev/null +++ b/HARMONY_MIGRATION_NOTES.md @@ -0,0 +1,25 @@ +## Assumptions that are different in Harmony + +* We can't use short name and version with Harmony like we do with ECS, we have to use + Concept ID (or DOI). We need to get this from CMR using short name and version. +* Variable subsetting won't be supported on day 1. +* All the ICESat-2 products we currently support will not be supported on day 1. + * + + +## Testing with Harmony + +Harmony is available for testing in the UAT environment. + +We will need to interact with everything (CMR, Earthdata Login, Harmony itself) in UAT +for icepyx to work correctly. + +* URLs *temporarily* modified for UAT. +* You need a separate Earthdata Login registration for UAT + (). +* The UAT NSIDC provider name is `NSIDC_UAT` + (). +* To test in UAT (i.e. access data in `NSIDC_CUAT` provider), your Earthdata Login + account must be on an access control list. Ask NSIDC operations for help. + * The code *temporarily* uses `$EDL_TOKEN` envvar to authenticate with CMR. Populate + this envvar with your Earthdata Login token. From 7ddb248986b13781b5ab9b771395382c9d678091 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 16 Sep 2024 18:23:32 -0600 Subject: [PATCH 03/17] Use NSIDC's UAT data provider for CMR --- icepyx/core/cmr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icepyx/core/cmr.py b/icepyx/core/cmr.py index 0c453c78b..2cd6ece8a 100644 --- a/icepyx/core/cmr.py +++ b/icepyx/core/cmr.py @@ -1,3 +1,4 @@ from typing import Final -CMR_PROVIDER: Final = "NSIDC_CPRD" +CMR_PROVIDER: Final = "NSIDC_CUAT" +# CMR_PROVIDER: Final = "NSIDC_CPRD" From bb5eabd0fe5754ab9b096bb3063b8073481bed2b Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 17:14:25 -0600 Subject: [PATCH 04/17] Use envvar to pass EDL token to CMR to access restricted testing collections Removing the uat module should make it easy to find the rest of the code we need to clean up. --- icepyx/core/granules.py | 7 ++++++- icepyx/uat.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 icepyx/uat.py diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 119e60bb3..5caaa68c6 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -25,6 +25,7 @@ EGIRequiredParamsSearch, ) from icepyx.core.urls import DOWNLOAD_BASE_URL, GRANULE_SEARCH_BASE_URL, ORDER_BASE_URL +from icepyx.uat import EDL_ACCESS_TOKEN def info(grans: list[dict]) -> dict[str, Union[int, float]]: @@ -228,7 +229,11 @@ def get_avail( # if not hasattr(self, 'avail'): self.avail = [] - headers = {"Accept": "application/json", "Client-Id": "icepyx"} + headers = { + "Accept": "application/json", + "Client-Id": "icepyx", + "Authorization": f"Bearer {EDL_ACCESS_TOKEN}", + } # note we should also check for errors whenever we ping NSIDC-API - # make a function to check for errors diff --git a/icepyx/uat.py b/icepyx/uat.py new file mode 100644 index 000000000..738cb0ade --- /dev/null +++ b/icepyx/uat.py @@ -0,0 +1,6 @@ +import os +from typing import Final + +# HACK: For testing with UAT, we need a token to authorize ourselves to see the +# private collections we want to test with. +EDL_ACCESS_TOKEN: Final = os.environ["EDL_TOKEN"] From 7f693e3d66d37a1b16f2d7d64da5ab56d6c14654 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 17:14:43 -0600 Subject: [PATCH 05/17] Document open questions and another assumption --- HARMONY_MIGRATION_NOTES.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index 4f2e275bb..6c531d6f7 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -5,6 +5,7 @@ * Variable subsetting won't be supported on day 1. * All the ICESat-2 products we currently support will not be supported on day 1. * +* ECS and CMR shared some parameters. This is not the case with Harmony. ## Testing with Harmony @@ -23,3 +24,15 @@ for icepyx to work correctly. account must be on an access control list. Ask NSIDC operations for help. * The code *temporarily* uses `$EDL_TOKEN` envvar to authenticate with CMR. Populate this envvar with your Earthdata Login token. + + +## Open questions + +### Which API? + +Harmony has two APIs: + +* [OGC Environmental Data Retrieval API](https://harmony.earthdata.nasa.gov/docs/edr-api) +* [OGC Coverages API](https://harmony.earthdata.nasa.gov/docs/api/) + +Which should be used and when and why? From cf19a93805cda2ff36d217722d09c7152ccdb3e0 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 20:25:39 -0600 Subject: [PATCH 06/17] Add concept_id cached property to Query class We need this to talk to Harmony -- Harmony doesn't support the product version as a query parameter, so we need to look up the real unique identifier in advance. --- icepyx/core/cmr.py | 25 +++++++++++++++++++++++++ icepyx/core/query.py | 8 ++++++++ 2 files changed, 33 insertions(+) diff --git a/icepyx/core/cmr.py b/icepyx/core/cmr.py index 2cd6ece8a..0b1beb700 100644 --- a/icepyx/core/cmr.py +++ b/icepyx/core/cmr.py @@ -1,4 +1,29 @@ from typing import Final +import requests + +from icepyx.core.urls import COLLECTION_SEARCH_BASE_URL +from icepyx.uat import EDL_ACCESS_TOKEN + CMR_PROVIDER: Final = "NSIDC_CUAT" # CMR_PROVIDER: Final = "NSIDC_CPRD" + + +def get_concept_id(*, product: str, version: str) -> str: + response = requests.get( + COLLECTION_SEARCH_BASE_URL, + headers={ + "Authorization": f"Bearer {EDL_ACCESS_TOKEN}", + }, + params={ + "short_name": product, + "version": version, + "provider": CMR_PROVIDER, + }, + ) + metadata = response.json()["feed"]["entry"] + + if len(metadata) != 1: + raise RuntimeError(f"Expected 1 result from CMR, received {metadata}") + + return metadata[0]["id"] diff --git a/icepyx/core/query.py b/icepyx/core/query.py index ef9458279..94cafd16f 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -10,6 +10,7 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin +from icepyx.core.cmr import get_concept_id from icepyx.core.exceptions import DeprecationError import icepyx.core.granules as granules from icepyx.core.granules import Granules @@ -464,6 +465,13 @@ def __str__(self) -> str: self.spatial_extent, self.dates, self.product, self.product_version ) + @cached_property + def concept_id(self) -> str: + return get_concept_id( + product=self.product, + version=self.product_version, + ) + @property def dataset(self) -> Never: """ From 6769f01848521e5a98ea861676e6483fcada6aa2 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 20:26:50 -0600 Subject: [PATCH 07/17] Remove redundant type info in docstring --- icepyx/core/query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 94cafd16f..d6894587b 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -988,16 +988,16 @@ def order_granules( Parameters ---------- - verbose : boolean, default False + verbose : Print out all feedback available from the order process. Progress information is automatically printed regardless of the value of verbose. - subset : boolean, default True + subset : Apply subsetting to the data order from the NSIDC, returning only data that meets the subset parameters. Spatial and temporal subsetting based on the input parameters happens by default when subset=True, but additional subsetting options are available. Spatial subsetting returns all data that are within the area of interest (but not complete granules. This eliminates false-positive granules returned by the metadata-level search) - email: boolean, default False + email : Have NSIDC auto-send order status email updates to indicate order status as pending/completed. The emails are sent to the account associated with your Earthdata account. **kwargs : key-value pairs From 4d0e641763a840704ec72c2234ec8fd6913df2e0 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 20:27:22 -0600 Subject: [PATCH 08/17] Remove OBE docstring fragments --- icepyx/core/granules.py | 4 ---- icepyx/core/query.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 5caaa68c6..18497602c 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -528,10 +528,6 @@ def download(self, verbose, path, restart=False): -------- query.Query.download_granules """ - """ - extract : boolean, default False - Unzip the downloaded granules. - """ # DevNote: this will replace any existing orderIDs with the saved list # (could create confusion depending on whether download was interrupted or kernel restarted) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index d6894587b..ced5e0de0 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1117,10 +1117,6 @@ def download_granules( See Also -------- granules.download - """ - """ - extract : boolean, default False - Unzip the downloaded granules. Examples -------- From 609a07cef0d3b1f1b0971d9bb57d664fa27d0eb5 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 17 Sep 2024 20:36:58 -0600 Subject: [PATCH 09/17] Remove 1.x deprecation warning --- icepyx/__init__.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/icepyx/__init__.py b/icepyx/__init__.py index b0cd8095d..a9d61834b 100644 --- a/icepyx/__init__.py +++ b/icepyx/__init__.py @@ -1,17 +1,3 @@ -from warnings import warn - -deprecation_msg = """icepyx v1.x is being deprecated; the back-end systems on which it relies -will be shut down as of late 2024. At that time, upgrade to icepyx v2.x, which uses the -new NASA Harmony back-end, will be required. Please see - for more -information! -""" -# IMPORTANT: This is being done before the other icepyx imports because the imported -# code changes warning filters. If this is done after the imports, the warning won't -# work. -warn(deprecation_msg, FutureWarning, stacklevel=2) - - from _icepyx_version import version as __version__ from icepyx.core.query import GenQuery, Query From e39586c83d0a883db2152e7e2cf6ed13080bdc4d Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 25 Sep 2024 13:53:19 -0600 Subject: [PATCH 10/17] Add note about Harmony's ICESat-2 live date --- HARMONY_MIGRATION_NOTES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index 6c531d6f7..d18b801e4 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -12,6 +12,12 @@ Harmony is available for testing in the UAT environment. +> [!NOTE] +> ICESat-2 products will be available in production in early October 2024. If you're +> reading this after that time, please talk to Amy Steiker about Harmony's current +> status before investing time setting up to test with UAT. If prod is available, test +> with prod. + We will need to interact with everything (CMR, Earthdata Login, Harmony itself) in UAT for icepyx to work correctly. From 6c7c862056ed0e8589eacac333e3df4e9195e3c2 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 25 Sep 2024 13:57:14 -0600 Subject: [PATCH 11/17] Answer open question about multiple Harmony APIs --- HARMONY_MIGRATION_NOTES.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index d18b801e4..4e4b6f5af 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -32,7 +32,7 @@ for icepyx to work correctly. this envvar with your Earthdata Login token. -## Open questions +## FAQ ### Which API? @@ -42,3 +42,18 @@ Harmony has two APIs: * [OGC Coverages API](https://harmony.earthdata.nasa.gov/docs/api/) Which should be used and when and why? + + +#### "Answer" + +Use the [OGC Coverages API](https://harmony.earthdata.nasa.gov/docs/api/)! + +> My take is that we ought to focus on the Coverages API for ICESat-2, since we aren’t +> making use of the new parameters. And this is what they primarily support. But I don’t +> have a good handle on whether we ought to pursue the EDR API at any point. +> +> - Amy Steiker + +See this thread on EOSDIS Slack for more details: + + From 05c7edeca93924f39abc41ce527967c7e2f425e1 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Wed, 25 Sep 2024 18:36:34 -0600 Subject: [PATCH 12/17] WIP Don't trust anything in here --- icepyx/core/cmr.py | 3 + icepyx/core/exceptions.py | 8 +++ icepyx/core/granules.py | 2 + icepyx/core/harmony.py | 13 ++++ icepyx/core/is2ref.py | 15 +++-- icepyx/core/query.py | 9 ++- icepyx/core/types.py | 111 ---------------------------------- icepyx/core/types/__init__.py | 29 +++++++++ icepyx/core/types/api.py | 28 +++++++++ icepyx/core/urls.py | 10 +-- requirements.txt | 1 + 11 files changed, 109 insertions(+), 120 deletions(-) create mode 100644 icepyx/core/harmony.py delete mode 100644 icepyx/core/types.py create mode 100644 icepyx/core/types/__init__.py create mode 100644 icepyx/core/types/api.py diff --git a/icepyx/core/cmr.py b/icepyx/core/cmr.py index 0b1beb700..2e33c6e1f 100644 --- a/icepyx/core/cmr.py +++ b/icepyx/core/cmr.py @@ -27,3 +27,6 @@ def get_concept_id(*, product: str, version: str) -> str: raise RuntimeError(f"Expected 1 result from CMR, received {metadata}") return metadata[0]["id"] + + +# TODO: Extract CMR collection query from granules.py diff --git a/icepyx/core/exceptions.py b/icepyx/core/exceptions.py index 085fed8c9..b4598dc2c 100644 --- a/icepyx/core/exceptions.py +++ b/icepyx/core/exceptions.py @@ -53,3 +53,11 @@ class ExhaustiveTypeGuardException(TypeGuardException): Used exclusively in cases where the typechecker needs a typeguard to tell it that a check is exhaustive. """ + + +class RefactoringException(Exception): + def __str__(self): + return ( + "This code is being refactored." + " The code after this exception is expected to require major changes." + ) diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py index 18497602c..d6a519048 100644 --- a/icepyx/core/granules.py +++ b/icepyx/core/granules.py @@ -337,6 +337,7 @@ def place_order( -------- query.Query.order_granules """ + raise icepyx.core.exceptions.RefactoringException self.get_avail(CMRparams, reqparams) @@ -371,6 +372,7 @@ def place_order( total_pages, " is submitting to NSIDC", ) + breakpoint() request_params.update({"page_num": page_num}) request = self.session.get(ORDER_BASE_URL, params=request_params) diff --git a/icepyx/core/harmony.py b/icepyx/core/harmony.py new file mode 100644 index 000000000..8d03ecc5d --- /dev/null +++ b/icepyx/core/harmony.py @@ -0,0 +1,13 @@ +from typing import Any + +import requests + +from icepyx.core.urls import CAPABILITIES_BASE_URL + + +def get_capabilities(concept_id: str) -> dict[str, Any]: + response = requests.get( + CAPABILITIES_BASE_URL, + params={"collectionId": concept_id}, + ) + return response.json() diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index ac080dd4f..b598ede26 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -8,7 +8,8 @@ import numpy as np import requests -from icepyx.core.urls import COLLECTION_SEARCH_BASE_URL, EGI_BASE_URL +from icepyx.core.exceptions import RefactoringException +from icepyx.core.urls import COLLECTION_SEARCH_BASE_URL # ICESat-2 specific reference functions @@ -92,9 +93,9 @@ def about_product(prod: str) -> dict: # DevGoal: use a mock of this output to test later functions, such as displaying options and widgets, etc. # options to get customization options for ICESat-2 data (though could be used generally) def _get_custom_options(session, product, version): - """ - Get lists of what customization options are available for the product from NSIDC. - """ + """Get lists of available customization options from Harmony.""" + raise RefactoringException + cust_options = {} if session is None: @@ -102,6 +103,11 @@ def _get_custom_options(session, product, version): "Don't forget to log in to Earthdata using query.earthdata_login()" ) + # concept_id_query_url = f"{COLLECTION_SEARCH_BASE_URL}?short_name={product}&version={version}" + # concept_id = session.get(concept_id_query_url).json()["feed"]["entry"][-1]["id"] + # capability_url = f"{CAPABILITIES_BASE_URL}?collectionId={concept_id}" + # response_json = session.get(capability_url).json() + capability_url = f"{EGI_BASE_URL}/capabilities/{product}.{version}.xml" response = session.get(capability_url) root = ET.fromstring(response.content) @@ -111,6 +117,7 @@ def _get_custom_options(session, product, version): cust_options.update({"options": subagent}) # reformatting + # cust_options.update({"fileformats": response_json["outputFormats"]}) formats = [Format.attrib for Format in root.iter("Format")] format_vals = [formats[i]["value"] for i in range(len(formats))] try: diff --git a/icepyx/core/query.py b/icepyx/core/query.py index ced5e0de0..91f458b39 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -11,7 +11,7 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin from icepyx.core.cmr import get_concept_id -from icepyx.core.exceptions import DeprecationError +from icepyx.core.exceptions import DeprecationError, RefactoringException import icepyx.core.granules as granules from icepyx.core.granules import Granules import icepyx.core.is2ref as is2ref @@ -613,6 +613,7 @@ def reqparams(self) -> EGIRequiredParams: >>> reg_a.reqparams # doctest: +SKIP {'short_name': 'ATL06', 'version': '006', 'page_size': 2000, 'page_num': 1, 'request_mode': 'async', 'include_meta': 'Y', 'client_string': 'icepyx'} """ + raise RefactoringException if not hasattr(self, "_reqparams"): self._reqparams = apifmt.Parameters("required", reqtype="search") @@ -649,6 +650,8 @@ def subsetparams(self, **kwargs) -> Union[EGIParamsSubset, dict[Never, Never]]: {'time': '2019-02-20T00:00:00,2019-02-28T23:59:59', 'bbox': '-55.0,68.0,-48.0,71.0'} """ + raise RefactoringException + if not hasattr(self, "_subsetparams"): self._subsetparams = apifmt.Parameters("subset") @@ -1024,6 +1027,8 @@ def order_granules( . Retry request status is: complete """ + breakpoint() + raise RefactoringException if not hasattr(self, "reqparams"): self.reqparams @@ -1138,6 +1143,8 @@ def download_granules( or len(self.granules.orderIDs) == 0 ): self.order_granules(verbose=verbose, subset=subset, **kwargs) + breakpoint() + raise RefactoringException self.granules.download(verbose, path, restart=restart) diff --git a/icepyx/core/types.py b/icepyx/core/types.py deleted file mode 100644 index e85f8696f..000000000 --- a/icepyx/core/types.py +++ /dev/null @@ -1,111 +0,0 @@ -from __future__ import annotations - -from typing import Literal, TypedDict, Union - -from typing_extensions import NotRequired - -ICESat2ProductShortName = Literal[ - "ATL01", - "ATL02", - "ATL03", - "ATL04", - "ATL06", - "ATL07", - "ATL07QL", - "ATL08", - "ATL09", - "ATL09QL", - "ATL10", - "ATL11", - "ATL12", - "ATL13", - "ATL14", - "ATL15", - "ATL16", - "ATL17", - "ATL19", - "ATL20", - "ATL21", - "ATL23", -] - -CMRParamsBase = TypedDict( - "CMRParamsBase", - { - "temporal": NotRequired[str], - "options[readable_granule_name][pattern]": NotRequired[str], - "options[spatial][or]": NotRequired[str], - "readable_granule_name[]": NotRequired[str], - }, -) - - -class CMRParamsWithBbox(CMRParamsBase): - bounding_box: str - - -class CMRParamsWithPolygon(CMRParamsBase): - polygon: str - - -CMRParams = Union[CMRParamsWithBbox, CMRParamsWithPolygon] - - -class EGIRequiredParamsBase(TypedDict): - """Common parameters for searching, ordering, or downloading from EGI. - - See: https://wiki.earthdata.nasa.gov/display/SDPSDOCS/EGI+Programmatic+Access+Documentation - - EGI shares parameters with CMR, so this data is used in conjunction with CMRParams - to build EGI requests. - - TODO: Validate more strongly (with Pydantic and its annotated types? - https://docs.pydantic.dev/latest/concepts/types/#composing-types-via-annotated): - - * version is 3 digits - * 0 < page_size <= 2000 - """ - - short_name: ICESat2ProductShortName # alias: "product" - version: str - page_size: int # default 2000 - page_num: int # default 0 - - -class EGIRequiredParamsSearch(EGIRequiredParamsBase): - """Parameters for interacting with EGI.""" - - -class EGIRequiredParamsDownload(EGIRequiredParamsBase): - """Parameters for ordering from EGI. - - TODO: Validate more strongly (with Pydantic?): page_num >=0. - """ - - request_mode: Literal["sync", "async", "stream"] # default "async" - include_meta: Literal["Y", "N"] # default "Y" - client_string: Literal["icepyx"] # default "icepyx" - # token, email - - -class EGIParamsSubsetBase(TypedDict): - """Parameters for subsetting with EGI.""" - - time: NotRequired[str] - format: NotRequired[str] - projection: NotRequired[str] - projection_parameters: NotRequired[str] - Coverage: NotRequired[str] - - -class EGIParamsSubsetBbox(EGIParamsSubsetBase): - bbox: NotRequired[str] - - -class EGIParamsSubsetBoundingShape(EGIParamsSubsetBase): - Boundingshape: NotRequired[str] - - -EGIParamsSubset = Union[EGIParamsSubsetBbox, EGIParamsSubsetBoundingShape] - -EGIRequiredParams = Union[EGIRequiredParamsSearch, EGIRequiredParamsDownload] diff --git a/icepyx/core/types/__init__.py b/icepyx/core/types/__init__.py new file mode 100644 index 000000000..8e85cfea5 --- /dev/null +++ b/icepyx/core/types/__init__.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Literal + +ICESat2ProductShortName = Literal[ + "ATL01", + "ATL02", + "ATL03", + "ATL04", + "ATL06", + "ATL07", + "ATL07QL", + "ATL08", + "ATL09", + "ATL09QL", + "ATL10", + "ATL11", + "ATL12", + "ATL13", + "ATL14", + "ATL15", + "ATL16", + "ATL17", + "ATL19", + "ATL20", + "ATL21", + "ATL23", +] + diff --git a/icepyx/core/types/api.py b/icepyx/core/types/api.py new file mode 100644 index 000000000..b29ba8fb4 --- /dev/null +++ b/icepyx/core/types/api.py @@ -0,0 +1,28 @@ +from typing import Literal, TypedDict, Union + +from typing_extensions import NotRequired +from pydantic import BaseModel + +CMRParamsBase = TypedDict( + "CMRParamsBase", + { + "temporal": NotRequired[str], + "options[readable_granule_name][pattern]": NotRequired[str], + "options[spatial][or]": NotRequired[str], + "readable_granule_name[]": NotRequired[str], + }, +) + + +class CMRParamsWithBbox(CMRParamsBase): + bounding_box: str + + +class CMRParamsWithPolygon(CMRParamsBase): + polygon: str + + +CMRParams = Union[CMRParamsWithBbox, CMRParamsWithPolygon] + + +class HarmonyCoverageAPIParamsBase(BaseModel): diff --git a/icepyx/core/urls.py b/icepyx/core/urls.py index 70d10bc74..88b821b4f 100644 --- a/icepyx/core/urls.py +++ b/icepyx/core/urls.py @@ -1,10 +1,12 @@ from typing import Final +# TODO: Not UAT! CMR_BASE_URL: Final = "https://cmr.uat.earthdata.nasa.gov" GRANULE_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/granules" COLLECTION_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/collections.json" -EGI_BASE_URL: Final = "https://n5eil02u.ecs.nsidc.org/egi" -ORDER_BASE_URL: Final = f"{EGI_BASE_URL}/request" - -DOWNLOAD_BASE_URL: Final = "https://n5eil02u.ecs.nsidc.org/esir" +# TODO: Not UAT! +HARMONY_BASE_URL: Final = "https://harmony.uat.earthdata.nasa.gov" +CAPABILITIES_BASE_URL: Final = f"{HARMONY_BASE_URL}/capabilities" +ORDER_BASE_URL: Final = f"{HARMONY_BASE_URL}/...?" +DOWNLOAD_BASE_URL: Final = f"{HARMONY_BASE_URL}/...?" diff --git a/requirements.txt b/requirements.txt index 6a9659270..1ff3a8824 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ holoviews hvplot matplotlib numpy +pydantic>=2.9.2 requests s3fs shapely From 44fa0d17f97d444c3d7d5a3c4dd0ea01bef0616b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:36:57 +0000 Subject: [PATCH 13/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- icepyx/core/types/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/icepyx/core/types/__init__.py b/icepyx/core/types/__init__.py index 8e85cfea5..335474ea9 100644 --- a/icepyx/core/types/__init__.py +++ b/icepyx/core/types/__init__.py @@ -26,4 +26,3 @@ "ATL21", "ATL23", ] - From 7cc708593bc174673d763d23ea02e745c6e3e11d Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Thu, 26 Sep 2024 10:08:31 -0600 Subject: [PATCH 14/17] Add a quick start to harmony migration --- HARMONY_MIGRATION_NOTES.md | 101 +++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index 4e4b6f5af..42d2b8331 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -8,6 +8,107 @@ * ECS and CMR shared some parameters. This is not the case with Harmony. +## Getting started on development + +### Work so far + +Work in progress is on the `harmony` branch. This depends on the `low-hanging-refactors` +branch being merged. A PR is open. + +> [!IMPORTANT] +> Several commits establish communication with UAT instead of production. They will need +> to be reverted once Harmony is available in prod. + +In addition to this work, refactoring, type checking, and type annotations have been +added to the codebase to support the migration to Harmony. + + +### Familiarize with Harmony + +* Check out this amazing notebook provided by Amy Steiker and Patrick Quinn: + +* Review the interactive API documentation: + (remember, remove UAT from URL if + Harmony is live with ICESat-2 products in early October 2024) + + +### Getting started replacing ECS with Harmony + +1. Find the `WIP` commit (`ac916d6`) and use `git reset` to restore the changes into the + working tree. There are several breakpoints set, as well as an artificially + introduced exception class to help trace and narrow the code paths during + refactoring. +2. Exercise a specific code path. For example: + + ```python + import icepyx as ipx + import datetime as dt + + q = ipx.Query( + product="ATL06", + version="006", + spatial_extent=[-90, 68, 48, 90], + # "./doc/source/example_notebooks/supporting_files/simple_test_poly.gpkg", + date_range={ + "start_date": dt.datetime(2018, 10, 10, 0, 10, 0), + "end_date": dt.datetime(2018, 10, 18, 14, 45, 30), + # "end_date": '2019-02-28', + } + ) + + q.download_granules("/tmp/icepyx") + ``` + +3. Identify the first query to ECS. Queries, except the capabilities query in + `is2ref.py`, are formed from constants in `urls.py`. Continue this practice. Harmony + URLs in this file are placeholders. +4. Determine an equivalent Harmony query. The Harmony Coverages API has an equivalent to + the capabilities query in `is2ref.py`, for example. +5. Raise `RefactoringException` at the top of any functions or methods which currently + speak to ECS. This will help us find and delete those "dead code" functions later, + and prevent them from being inadvertently executed. +6. Write new functions or methods which speak to Harmony instead. It's important to + encapsulate the communication with the Harmony API in a single function. This may + mean replacing one function with several smaller functions during refactoring. +7. Maintain the high standard of documentation in the code. Include examples as doctests + in the new functions. Use Numpy style docstrings. **DO NOT** include type information + in docstrings -- write type annotations instead. They will be automatically + documented by the documentation generator. +8. Repeat from step 3 for the next EGI query. + +### Watch out for broken assumptions + +It's important to note that two major assumptions will require significant refactoring. +The type annotations will help with this process! + +1. Broken assumption: "CMR and EGI share parameter sets". My mental model looks like: + * Current: User passes in parameters to `Query(...)`. Those params are used to generate + separate "CMR parameters" and "reqparams". "CMRparams" are spatial and temporal + parameters compatible with CMR. I'm not sure about the naming of "reqparams", but I + think of them as the EGI parameters (which may include more than the user passed, like + `page_size`) _minus_ the CMR spatial and temporal parameters. The actual queries + submitted to CMR and EGI are based on those generated parameter sets. + * Future: In Harmony-land, the shared parameter assumption is broken. CMR and Harmony's + Coverages API have completely parameter sets. The code can be drastically simplified: + User passes in parameters to `Query(...)`. Those params are used directly to generate + both CMR and Harmony queries without an intervening layer. E.g. +2. Broken assumption: "We can query with only short_name and version number". Harmony + requires a unique identifier (concept ID or DOI). E.g.: + + (NOTE: UAT query using a collection from a test provider; we should be using + `NSIDC_CUAT` provider in real UAT queries and `NSIDC_CPRD` for real prod queries). + Since we want the user to be able to provide short_name and version, implementing the + concept ID as a `@cached_property` on `Query` which asks CMR for the concept ID makes + sense to me. + + +### Don't forget to enhance along the way + +* Now that we're ripping things apart and changing parameters, I think it's important to + replace the TypedDict annotations we're using with Pydantic models. This will enable us + to better encapsulate validation code that's currently spread around. + + ## Testing with Harmony Harmony is available for testing in the UAT environment. From 239eaa073b2093ba26bece70382f376bb5330768 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Fri, 27 Sep 2024 09:46:15 -0600 Subject: [PATCH 15/17] Mention other ongoing icepyx efforts --- HARMONY_MIGRATION_NOTES.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index 42d2b8331..ab60c0089 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -133,6 +133,19 @@ for icepyx to work correctly. this envvar with your Earthdata Login token. +## Integrating with other ongoing Icepyx work + +Harmony is a major breaking change, so we'll be releasing it in Icepyx v2. + +We know the community wants to break the API in some other ways, so we want to include those in v2 as well! + +* Some of Icepyx's Query functionality is already served by earthaccess; refactor or replace the `Query` class? +* ? + +Jessica is currently determining who can help work on these changes, and what that looks like. *If you, the +Harmony/ECS migration developer, identify opportunities to easily replace portions of Icepyx with _earthaccess_ +or other libraries, take advantage of that opportunity. + ## FAQ ### Which API? From ba1b00753fd3357731aa6b7c23e267ed40d8cfdb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Sep 2024 15:46:22 +0000 Subject: [PATCH 16/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- HARMONY_MIGRATION_NOTES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index ab60c0089..50e964a2b 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -135,14 +135,14 @@ for icepyx to work correctly. ## Integrating with other ongoing Icepyx work -Harmony is a major breaking change, so we'll be releasing it in Icepyx v2. +Harmony is a major breaking change, so we'll be releasing it in Icepyx v2. We know the community wants to break the API in some other ways, so we want to include those in v2 as well! * Some of Icepyx's Query functionality is already served by earthaccess; refactor or replace the `Query` class? * ? -Jessica is currently determining who can help work on these changes, and what that looks like. *If you, the +Jessica is currently determining who can help work on these changes, and what that looks like. *If you, the Harmony/ECS migration developer, identify opportunities to easily replace portions of Icepyx with _earthaccess_ or other libraries, take advantage of that opportunity. From 75fc5e7a469746419e1746674ae06aaf5e2e09c2 Mon Sep 17 00:00:00 2001 From: Trey Stafford Date: Thu, 31 Oct 2024 13:31:20 -0600 Subject: [PATCH 17/17] Remove references to UAT --- HARMONY_MIGRATION_NOTES.md | 36 +++--------------------------------- icepyx/core/auth.py | 2 +- icepyx/core/cmr.py | 7 +------ icepyx/core/urls.py | 8 ++++---- icepyx/uat.py | 6 ------ 5 files changed, 9 insertions(+), 50 deletions(-) delete mode 100644 icepyx/uat.py diff --git a/HARMONY_MIGRATION_NOTES.md b/HARMONY_MIGRATION_NOTES.md index 50e964a2b..3519cd93f 100644 --- a/HARMONY_MIGRATION_NOTES.md +++ b/HARMONY_MIGRATION_NOTES.md @@ -15,10 +15,6 @@ Work in progress is on the `harmony` branch. This depends on the `low-hanging-refactors` branch being merged. A PR is open. -> [!IMPORTANT] -> Several commits establish communication with UAT instead of production. They will need -> to be reverted once Harmony is available in prod. - In addition to this work, refactoring, type checking, and type annotations have been added to the codebase to support the migration to Harmony. @@ -28,8 +24,7 @@ added to the codebase to support the migration to Harmony. * Check out this amazing notebook provided by Amy Steiker and Patrick Quinn: * Review the interactive API documentation: - (remember, remove UAT from URL if - Harmony is live with ICESat-2 products in early October 2024) + ### Getting started replacing ECS with Harmony @@ -94,9 +89,8 @@ The type annotations will help with this process! both CMR and Harmony queries without an intervening layer. E.g. 2. Broken assumption: "We can query with only short_name and version number". Harmony requires a unique identifier (concept ID or DOI). E.g.: - - (NOTE: UAT query using a collection from a test provider; we should be using - `NSIDC_CUAT` provider in real UAT queries and `NSIDC_CPRD` for real prod queries). + + . Since we want the user to be able to provide short_name and version, implementing the concept ID as a `@cached_property` on `Query` which asks CMR for the concept ID makes sense to me. @@ -109,30 +103,6 @@ The type annotations will help with this process! to better encapsulate validation code that's currently spread around. -## Testing with Harmony - -Harmony is available for testing in the UAT environment. - -> [!NOTE] -> ICESat-2 products will be available in production in early October 2024. If you're -> reading this after that time, please talk to Amy Steiker about Harmony's current -> status before investing time setting up to test with UAT. If prod is available, test -> with prod. - -We will need to interact with everything (CMR, Earthdata Login, Harmony itself) in UAT -for icepyx to work correctly. - -* URLs *temporarily* modified for UAT. -* You need a separate Earthdata Login registration for UAT - (). -* The UAT NSIDC provider name is `NSIDC_UAT` - (). -* To test in UAT (i.e. access data in `NSIDC_CUAT` provider), your Earthdata Login - account must be on an access control list. Ask NSIDC operations for help. - * The code *temporarily* uses `$EDL_TOKEN` envvar to authenticate with CMR. Populate - this envvar with your Earthdata Login token. - - ## Integrating with other ongoing Icepyx work Harmony is a major breaking change, so we'll be releasing it in Icepyx v2. diff --git a/icepyx/core/auth.py b/icepyx/core/auth.py index d94802c64..71b4393e5 100644 --- a/icepyx/core/auth.py +++ b/icepyx/core/auth.py @@ -68,7 +68,7 @@ def auth(self): """ # Only login the first time .auth is accessed if self._auth is None: - auth = earthaccess.login(system=earthaccess.system.UAT) + auth = earthaccess.login() # check for a valid auth response if auth.authenticated is False: raise AuthenticationError( diff --git a/icepyx/core/cmr.py b/icepyx/core/cmr.py index 2e33c6e1f..d50b871a7 100644 --- a/icepyx/core/cmr.py +++ b/icepyx/core/cmr.py @@ -3,18 +3,13 @@ import requests from icepyx.core.urls import COLLECTION_SEARCH_BASE_URL -from icepyx.uat import EDL_ACCESS_TOKEN -CMR_PROVIDER: Final = "NSIDC_CUAT" -# CMR_PROVIDER: Final = "NSIDC_CPRD" +CMR_PROVIDER: Final = "NSIDC_CPRD" def get_concept_id(*, product: str, version: str) -> str: response = requests.get( COLLECTION_SEARCH_BASE_URL, - headers={ - "Authorization": f"Bearer {EDL_ACCESS_TOKEN}", - }, params={ "short_name": product, "version": version, diff --git a/icepyx/core/urls.py b/icepyx/core/urls.py index 88b821b4f..643525cc9 100644 --- a/icepyx/core/urls.py +++ b/icepyx/core/urls.py @@ -1,12 +1,12 @@ from typing import Final -# TODO: Not UAT! -CMR_BASE_URL: Final = "https://cmr.uat.earthdata.nasa.gov" +CMR_BASE_URL: Final = "https://cmr.earthdata.nasa.gov" GRANULE_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/granules" COLLECTION_SEARCH_BASE_URL: Final = f"{CMR_BASE_URL}/search/collections.json" -# TODO: Not UAT! -HARMONY_BASE_URL: Final = "https://harmony.uat.earthdata.nasa.gov" +# TODO: the harmony base url and capabilities URL will be handled by +# `harmony-py`: remove these constants. +HARMONY_BASE_URL: Final = "https://harmony.earthdata.nasa.gov" CAPABILITIES_BASE_URL: Final = f"{HARMONY_BASE_URL}/capabilities" ORDER_BASE_URL: Final = f"{HARMONY_BASE_URL}/...?" DOWNLOAD_BASE_URL: Final = f"{HARMONY_BASE_URL}/...?" diff --git a/icepyx/uat.py b/icepyx/uat.py deleted file mode 100644 index 738cb0ade..000000000 --- a/icepyx/uat.py +++ /dev/null @@ -1,6 +0,0 @@ -import os -from typing import Final - -# HACK: For testing with UAT, we need a token to authorize ourselves to see the -# private collections we want to test with. -EDL_ACCESS_TOKEN: Final = os.environ["EDL_TOKEN"]