From f79a005b0a75e0e1261111a72589a4d678f3c819 Mon Sep 17 00:00:00 2001 From: Daniel Kaufman <114174502+danielfromearth@users.noreply.github.com> Date: Fri, 10 May 2024 12:53:39 -0400 Subject: [PATCH] Issue 421 - option to use Earthdata User Acceptance Testing (UAT) system (#426) Co-authored-by: Matt Fisher Co-authored-by: Chuck Daniels Co-authored-by: Daniel Kaufman Fixes #421 --- .pre-commit-config.yaml | 4 +- CHANGELOG.md | 5 +- docs/howto/authenticate.md | 12 +++++ earthaccess/__init__.py | 3 ++ earthaccess/api.py | 14 +++-- earthaccess/auth.py | 73 ++++++++++++++++----------- earthaccess/search.py | 13 +++-- earthaccess/store.py | 8 +-- earthaccess/system.py | 22 ++++++++ tests/unit/test_auth.py | 20 +------- tests/unit/test_collection_queries.py | 2 +- tests/unit/test_store.py | 6 --- tests/unit/test_uat.py | 56 ++++++++++++++++++++ 13 files changed, 169 insertions(+), 69 deletions(-) create mode 100644 earthaccess/system.py create mode 100644 tests/unit/test_uat.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index abe23900..13643881 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,13 +12,13 @@ repos: - id: check-toml - id: check-json - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.2.1 hooks: - id: ruff args: ["--fix", "--exit-non-zero-on-fix"] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.1.0" + rev: "v4.0.0-alpha.8" hooks: - id: prettier types_or: [yaml] diff --git a/CHANGELOG.md b/CHANGELOG.md index fe0efb32..3966cdaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,16 @@ # Changelog ## [Unreleased] +* Changes + * Removed the `get_user_profile` method and the `email_address` and `profile` attributes from the `Auth` class ([#421](https://github.com/nsidc/earthaccess/issues/421)) * Bug fixes: * fixed 483 by extracting a common CMR query method for collections and granules using SearchAfter header * Added VCR support for verifying the API call to CMR and the parsing of returned results without relying on CMR availability post development * Enhancements: * Corrected and enhanced static type hints for functions and methods that make - CMR queries or handle CMR query results (#508) + CMR queries or handle CMR query results ([#508](https://github.com/nsidc/earthaccess/issues/508)) + * Enable queries to Earthdata User Acceptance Testing (UAT) system for authenticated accounts ([#421](https://github.com/nsidc/earthaccess/issues/421)) ## [v0.9.0] 2024-02-28 diff --git a/docs/howto/authenticate.md b/docs/howto/authenticate.md index 68c16ff6..407e5140 100644 --- a/docs/howto/authenticate.md +++ b/docs/howto/authenticate.md @@ -60,3 +60,15 @@ Once you are authenticated with NASA EDL you can: * Regenerate CMR tokens (used for restricted datasets). +### Earthdata User Acceptance Testing (UAT) environment + +If your EDL account is authorized to access the User Acceptance Testing (UAT) system, +you can set earthaccess to work with its EDL and CMR endpoints +by setting the `system` argument at login, as follows: + +```python +import earthaccess + +earthaccess.login(system=earthaccess.UAT) + +``` diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 4977bf38..05203308 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -20,6 +20,7 @@ from .kerchunk import consolidate_metadata from .search import DataCollections, DataGranules from .store import Store +from .system import PROD, UAT logger = logging.getLogger(__name__) @@ -41,6 +42,8 @@ "Store", "auth_environ", "consolidate_metadata", + "PROD", + "UAT", ] __version__ = version("earthaccess") diff --git a/earthaccess/api.py b/earthaccess/api.py index 0dd793cf..e2c6aa1e 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -9,6 +9,7 @@ from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store +from .system import PROD, System from .utils import _validation as validate @@ -125,7 +126,7 @@ def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: return query.get_all() -def login(strategy: str = "all", persist: bool = False) -> Auth: +def login(strategy: str = "all", persist: bool = False, system: System = PROD) -> Auth: """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/). Parameters: @@ -137,14 +138,21 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: * **"netrc"**: retrieve username and password from ~/.netrc. * **"environment"**: retrieve username and password from `$EARTHDATA_USERNAME` and `$EARTHDATA_PASSWORD`. persist: will persist credentials in a .netrc file + system: the Earthdata system to access, defaults to PROD Returns: An instance of Auth. """ + # Set the underlying Auth object's earthdata system, + # before triggering the getattr function for `__auth__`. + earthaccess._auth._set_earthdata_system(system) + if strategy == "all": for strategy in ["environment", "netrc", "interactive"]: try: - earthaccess.__auth__.login(strategy=strategy, persist=persist) + earthaccess.__auth__.login( + strategy=strategy, persist=persist, system=system + ) except Exception: pass @@ -152,7 +160,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: earthaccess.__store__ = Store(earthaccess.__auth__) break else: - earthaccess.__auth__.login(strategy=strategy, persist=persist) + earthaccess.__auth__.login(strategy=strategy, persist=persist, system=system) if earthaccess.__auth__.authenticated: earthaccess.__store__ = Store(earthaccess.__auth__) diff --git a/earthaccess/auth.py b/earthaccess/auth.py index caf19551..b8995f9d 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -13,6 +13,7 @@ from tinynetrc import Netrc from .daac import DAACS +from .system import PROD, System try: user_agent = f"earthaccess v{importlib.metadata.version('earthaccess')}" @@ -37,7 +38,9 @@ class SessionWithHeaderRedirection(requests.Session): ] def __init__( - self, username: Optional[str] = None, password: Optional[str] = None + self, + username: Optional[str] = None, + password: Optional[str] = None, ) -> None: super().__init__() self.headers.update({"User-Agent": user_agent}) @@ -72,12 +75,14 @@ def __init__(self) -> None: # Maybe all these predefined URLs should be in a constants.py file self.authenticated = False self.tokens: List = [] - self.EDL_GET_TOKENS_URL = "https://urs.earthdata.nasa.gov/api/users/tokens" - self.EDL_GET_PROFILE = "https://urs.earthdata.nasa.gov/api/users/?client_id=ntD0YGC_SM3Bjs-Tnxd7bg" - self.EDL_GENERATE_TOKENS_URL = "https://urs.earthdata.nasa.gov/api/users/token" - self.EDL_REVOKE_TOKEN = "https://urs.earthdata.nasa.gov/api/users/revoke_token" + self._set_earthdata_system(PROD) - def login(self, strategy: str = "netrc", persist: bool = False) -> Any: + def login( + self, + strategy: str = "netrc", + persist: bool = False, + system: Optional[System] = None, + ) -> Any: """Authenticate with Earthdata login. Parameters: @@ -89,11 +94,15 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: * **"environment"**: Retrieve a username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. persist: Will persist credentials in a `.netrc` file. + system (Env): the EDL endpoint to log in to Earthdata, defaults to PROD Returns: An instance of Auth. """ - if self.authenticated: + if system is not None: + self._set_earthdata_system(system) + + if self.authenticated and (system == self.system): logger.debug("We are already authenticated with NASA EDL") return self if strategy == "interactive": @@ -102,8 +111,26 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: self._netrc() if strategy == "environment": self._environment() + return self + def _set_earthdata_system(self, system: System) -> None: + self.system = system + + # Maybe all these predefined URLs should be in a constants.py file + self.EDL_GET_TOKENS_URL = f"https://{self.system.edl_hostname}/api/users/tokens" + self.EDL_GENERATE_TOKENS_URL = ( + f"https://{self.system.edl_hostname}/api/users/token" + ) + self.EDL_REVOKE_TOKEN = ( + f"https://{self.system.edl_hostname}/api/users/revoke_token" + ) + + self._eula_url = ( + f"https://{self.system.edl_hostname}/users/earthaccess/unaccepted_eulas" + ) + self._apps_url = f"https://{self.system.edl_hostname}/application_search" + def refresh_tokens(self) -> bool: """Refresh CMR tokens. Tokens are used to do authenticated queries on CMR for restricted and early access datasets. @@ -198,10 +225,8 @@ def get_s3_credentials( print( f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}" ) - eula_url = "https://urs.earthdata.nasa.gov/users/earthaccess/unaccepted_eulas" - apps_url = "https://urs.earthdata.nasa.gov/application_search" print( - f"Consider accepting the EULAs available at {eula_url} and applications at {apps_url}" + f"Consider accepting the EULAs available at {self._eula_url} and applications at {self._apps_url}" ) return {} @@ -234,15 +259,6 @@ class Session instance with Auth and bearer token headers ) return session - def get_user_profile(self) -> Dict[str, Any]: - if hasattr(self, "username") and self.authenticated: - session = self.get_session() - url = self.EDL_GET_PROFILE.replace("", self.username) - user_profile = session.get(url).json() - return user_profile - else: - return {} - def _interactive(self, persist_credentials: bool = False) -> bool: username = input("Enter your Earthdata Login username: ") password = getpass.getpass(prompt="Enter your Earthdata password: ") @@ -261,11 +277,11 @@ def _netrc(self) -> bool: raise FileNotFoundError(f"No .netrc found in {Path.home()}") from err except NetrcParseError as err: raise NetrcParseError("Unable to parse .netrc") from err - if my_netrc["urs.earthdata.nasa.gov"] is not None: - username = my_netrc["urs.earthdata.nasa.gov"]["login"] - password = my_netrc["urs.earthdata.nasa.gov"]["password"] - else: + if (creds := my_netrc[self.system.edl_hostname]) is None: return False + + username = creds["login"] + password = creds["password"] authenticated = self._get_credentials(username, password) if authenticated: logger.debug("Using .netrc file for EDL") @@ -313,12 +329,6 @@ def _get_credentials( logger.debug( f"Using token with expiration date: {self.token['expiration_date']}" ) - profile = self.get_user_profile() - if "email_address" in profile: - self.user_profile = profile - self.email = profile["email_address"] - else: - self.email = "" return self.authenticated @@ -369,7 +379,10 @@ def _persist_user_credentials(self, username: str, password: str) -> bool: print(e) return False my_netrc = Netrc(str(netrc_path)) - my_netrc["urs.earthdata.nasa.gov"] = {"login": username, "password": password} + my_netrc[self.system.edl_hostname] = { + "login": username, + "password": password, + } my_netrc.save() urs_cookies_path = Path.home() / ".urs_cookies" if not urs_cookies_path.exists(): diff --git a/earthaccess/search.py b/earthaccess/search.py index bc233a4c..9f73af14 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -98,10 +98,13 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No self.session = ( # To search, we need the new bearer tokens from NASA Earthdata auth.get_session(bearer_token=True) - if auth is not None and auth.authenticated + if auth and auth.authenticated else requests.session() ) + if auth: + self.mode(auth.system.cmr_base_url) + self._debug = False self.params["has_granules"] = True @@ -449,16 +452,18 @@ class DataGranules(GranuleQuery): _format = "umm_json" def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: - """Base class for Granule and Collection CMR queries.""" super().__init__(*args, **kwargs) self.session = ( # To search, we need the new bearer tokens from NASA Earthdata auth.get_session(bearer_token=True) - if auth is not None and auth.authenticated + if auth and auth.authenticated else requests.session() ) + if auth: + self.mode(auth.system.cmr_base_url) + self._debug = False @override @@ -769,7 +774,7 @@ def _valid_state(self) -> bool: return True def _is_cloud_hosted(self, granule: Any) -> bool: - """Check if a granule record in CMR advertises "direct access".""" + """Check if a granule record, from CMR, advertises "direct access".""" if "RelatedUrls" not in granule["umm"]: return False diff --git a/earthaccess/store.py b/earthaccess/store.py index ef4fa01a..67ff0c65 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -100,7 +100,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None: self._s3_credentials: Dict[ Tuple, Tuple[datetime.datetime, Dict[str, str]] ] = {} - oauth_profile = "https://urs.earthdata.nasa.gov/profile" + oauth_profile = f"https://{auth.system.edl_hostname}/profile" # sets the initial URS cookie self._requests_cookies: Dict[str, Any] = {} self.set_requests_session(oauth_profile) @@ -188,9 +188,9 @@ def set_requests_session( resp.raise_for_status() else: self._requests_cookies.update(new_session.cookies.get_dict()) - elif resp.status_code >= 200 and resp.status_code <= 300: + elif 200 <= resp.status_code < 300: self._requests_cookies = self._http_session.cookies.get_dict() - elif resp.status_code >= 500: + else: resp.raise_for_status() def get_s3fs_session( @@ -458,6 +458,7 @@ def get( Parameters: granules: A list of granules(DataGranule) instances or a list of granule links (HTTP). local_path: Local directory to store the remote data granules. + provider: a valid cloud provider, each DAAC has a provider code for their cloud distributions threads: Parallel number of threads to use to download the files; adjust as necessary, default = 8. @@ -497,6 +498,7 @@ def _get( Parameters: granules: A list of granules (DataGranule) instances or a list of granule links (HTTP). local_path: Local directory to store the remote data granules + provider: a valid cloud provider, each DAAC has a provider code for their cloud distributions threads: Parallel number of threads to use to download the files; adjust as necessary, default = 8. diff --git a/earthaccess/system.py b/earthaccess/system.py new file mode 100644 index 00000000..fbd2fbb6 --- /dev/null +++ b/earthaccess/system.py @@ -0,0 +1,22 @@ +"""Earthdata Environments/Systems module.""" + +from dataclasses import dataclass + +from typing_extensions import NewType + +from cmr import CMR_OPS, CMR_UAT + +CMRBaseURL = NewType("CMRBaseURL", str) +EDLHostname = NewType("EDLHostname", str) + + +@dataclass(frozen=True) +class System: + """Host URL options, for different Earthdata domains.""" + + cmr_base_url: CMRBaseURL + edl_hostname: EDLHostname + + +PROD = System(CMRBaseURL(CMR_OPS), EDLHostname("urs.earthdata.nasa.gov")) +UAT = System(CMRBaseURL(CMR_UAT), EDLHostname("uat.urs.earthdata.nasa.gov")) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index b2b0a048..dbed96b1 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -30,12 +30,6 @@ def test_auth_gets_proper_credentials(self, user_input, user_password): json={"email_address": "test@test.edu"}, status=200, ) - responses.add( - responses.GET, - "https://urs.earthdata.nasa.gov/api/users/user?client_id=ntD0YGC_SM3Bjs-Tnxd7bg", - json={}, - status=200, - ) # Test auth = Auth() @@ -46,7 +40,7 @@ def test_auth_gets_proper_credentials(self, user_input, user_password): self.assertEqual(auth.authenticated, True) self.assertTrue(auth.token in json_response) - # test that we are creaintg a session with the proper headers + # test that we are creating a session with the proper headers self.assertTrue("User-Agent" in headers) self.assertTrue("earthaccess" in headers["User-Agent"]) @@ -70,12 +64,6 @@ def test_auth_can_create_proper_credentials(self, user_input, user_password): json={}, status=200, ) - responses.add( - responses.GET, - "https://urs.earthdata.nasa.gov/api/users/user?client_id=ntD0YGC_SM3Bjs-Tnxd7bg", - json={}, - status=200, - ) responses.add( responses.POST, "https://urs.earthdata.nasa.gov/api/users/token", @@ -109,12 +97,6 @@ def test_auth_fails_for_wrong_credentials(self, user_input, user_password): json=json_response, status=401, ) - responses.add( - responses.GET, - "https://urs.earthdata.nasa.gov/api/users/user?client_id=ntD0YGC_SM3Bjs-Tnxd7bg", - json=json_response, - status=401, - ) responses.add( responses.POST, "https://urs.earthdata.nasa.gov/api/users/token", diff --git a/tests/unit/test_collection_queries.py b/tests/unit/test_collection_queries.py index f68db9bc..603784de 100644 --- a/tests/unit/test_collection_queries.py +++ b/tests/unit/test_collection_queries.py @@ -37,7 +37,7 @@ def test_query_can_find_cloud_provider(): assert query.params["provider"] == "POCLOUD" query = DataCollections().cloud_hosted(True).daac("PODAAC") assert query.params["provider"] == "POCLOUD" - # SEDAC does not have a cloud provider so it should default to the on prem provider + # SEDAC does not have a cloud provider, so it should default to the on prem provider query = DataCollections().cloud_hosted(True).daac("SEDAC") assert query.params["provider"] == "SEDAC" query = DataCollections().daac("ASDC").cloud_hosted(True) diff --git a/tests/unit/test_store.py b/tests/unit/test_store.py index 221ece1a..4da4e0e3 100644 --- a/tests/unit/test_store.py +++ b/tests/unit/test_store.py @@ -24,12 +24,6 @@ def setUp(self): json=json_response, status=200, ) - responses.add( - responses.GET, - "https://urs.earthdata.nasa.gov/api/users/user?client_id=ntD0YGC_SM3Bjs-Tnxd7bg", - json={}, - status=200, - ) self.auth = Auth() self.auth.login(strategy="environment") self.assertEqual(self.auth.authenticated, True) diff --git a/tests/unit/test_uat.py b/tests/unit/test_uat.py new file mode 100644 index 00000000..619db3e9 --- /dev/null +++ b/tests/unit/test_uat.py @@ -0,0 +1,56 @@ +# package imports +import json +from pathlib import Path +from unittest import mock + +import earthaccess +import responses + + +# TODO: Still need to create an integration test, with corresponding credentials for UAT. +class TestUatEnvironmentArgument: + @responses.activate # This will cause the test to check that all mocked URLs are hit. + @mock.patch("getpass.getpass", new=mock.Mock(return_value="password")) + @mock.patch( + "builtins.input", + new=mock.Mock(return_value="user"), + ) + def test_uat_login_when_uat_selected(self): + """Test the correct env is queried based on what's selected at login-time.""" + json_response = [ + {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"}, + {"access_token": "EDL-token-2", "expiration_date": "12/16/2021"}, + ] + responses.add( + responses.GET, + "https://uat.urs.earthdata.nasa.gov/api/users/tokens", + json=json_response, + status=200, + ) + responses.add( + responses.GET, + "https://uat.urs.earthdata.nasa.gov/profile", + json={"email_address": "test@test.edu"}, + status=200, + ) + + with open(Path(__file__).parent / "fixtures" / "atl03_umm.json", "r") as f: + cmr_json_response = json.loads(f.read()) + + responses.add( + responses.GET, + "https://cmr.uat.earthdata.nasa.gov/search/granules.umm_json?page_size=0", + json=cmr_json_response, + headers={"CMR-Hits": "0"}, + status=200, + ) + + # Login + auth = earthaccess.login(strategy="interactive", system=earthaccess.UAT) + assert auth.authenticated + assert auth.system.edl_hostname == earthaccess.UAT.edl_hostname + + # Query CMR, and check that mock communication was with UAT CMR + results = earthaccess.search_data() + assert len(results) == 1 + assert earthaccess.__auth__.system.edl_hostname == earthaccess.UAT.edl_hostname