diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e49ac4a..6e3094d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### Unreleased + +- Deprecate `earthaccess.get_s3fs_session` and `Store.get_s3fs_session`. Use + `earthaccess.get_s3_filesystem` and `Store.get_s3_filesystem`, respectively, + instead ([#766](https://github.com/nsidc/earthaccess/issues/766))([**@Sherwin-14**](https://github.com/Sherwin-14)) + ## [v0.10.0] 2024-07-19 ### Changed @@ -16,6 +22,7 @@ ([**@danielfromearth**](https://github.com/danielfromearth),[**@chuckwondo**](https://github.com/chuckwondo), [**@jhkennedy**](https://github.com/jhkennedy),[**@mfisher87**](https://github.com/mfisher87)) + ### Added - Enable queries to Earthdata User Acceptance Testing (UAT) system for authenticated accounts diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 8d399945..9839a9e8 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -10,6 +10,7 @@ get_fsspec_https_session, get_requests_https_session, get_s3_credentials, + get_s3_filesystem, get_s3fs_session, granule_query, login, @@ -34,6 +35,7 @@ "get_fsspec_https_session", "get_s3fs_session", "get_s3_credentials", + "get_s3_filesystem", "get_edl_token", "granule_query", "collection_query", diff --git a/earthaccess/api.py b/earthaccess/api.py index 0418d69f..7871c2de 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -3,7 +3,7 @@ import requests import s3fs from fsspec import AbstractFileSystem -from typing_extensions import Any, Dict, List, Optional, Union +from typing_extensions import Any, Dict, List, Optional, Union, deprecated import earthaccess @@ -325,6 +325,7 @@ def get_requests_https_session() -> requests.Session: return session +@deprecated("Use get_s3_filesystem instead") def get_s3fs_session( daac: Optional[str] = None, provider: Optional[str] = None, @@ -332,6 +333,26 @@ def get_s3fs_session( ) -> s3fs.S3FileSystem: """Returns a fsspec s3fs file session for direct access when we are in us-west-2. + Parameters: + daac: Any DAAC short name e.g. NSIDC, GES_DISC + provider: Each DAAC can have a cloud provider. + If the DAAC is specified, there is no need to use provider. + results: A list of results from search_data(). + `earthaccess` will use the metadata from CMR to obtain the S3 Endpoint. + + Returns: + An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour. + """ + return get_s3_filesystem(daac, provider, results) + + +def get_s3_filesystem( + daac: Optional[str] = None, + provider: Optional[str] = None, + results: Optional[DataGranule] = None, +) -> s3fs.S3FileSystem: + """Return an `s3fs.S3FileSystem` for direct access when running within the AWS us-west-2 region. + Parameters: daac: Any DAAC short name e.g. NSIDC, GES_DISC provider: Each DAAC can have a cloud provider. @@ -347,9 +368,9 @@ def get_s3fs_session( if results is not None: endpoint = results[0].get_s3_credentials_endpoint() if endpoint is not None: - session = earthaccess.__store__.get_s3fs_session(endpoint=endpoint) + session = earthaccess.__store__.get_s3_filesystem(endpoint=endpoint) return session - session = earthaccess.__store__.get_s3fs_session(daac=daac, provider=provider) + session = earthaccess.__store__.get_s3_filesystem(daac=daac, provider=provider) return session diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 02533a0e..26758184 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -39,7 +39,7 @@ def consolidate_metadata( ) from e if access == "direct": - fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"]) + fs = earthaccess.get_s3_filesystem(provider=granules[0]["meta"]["provider-id"]) else: fs = earthaccess.get_fsspec_https_session() diff --git a/earthaccess/store.py b/earthaccess/store.py index f3b1054c..a97f8188 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -14,6 +14,7 @@ import s3fs from multimethod import multimethod as singledispatchmethod from pqdm.threads import pqdm +from typing_extensions import deprecated import earthaccess @@ -197,6 +198,7 @@ def set_requests_session( else: resp.raise_for_status() + @deprecated("Use get_s3_filesystem instead") def get_s3fs_session( self, daac: Optional[str] = None, @@ -206,6 +208,25 @@ def get_s3fs_session( ) -> s3fs.S3FileSystem: """Returns a s3fs instance for a given cloud provider / DAAC. + Parameters: + daac: any of the DAACs, e.g. NSIDC, PODAAC + provider: a data provider if we know them, e.g. PODAAC -> POCLOUD + endpoint: pass the URL for the credentials directly + + Returns: + An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour. + """ + return self.get_s3_filesystem(daac, concept_id, provider, endpoint) + + def get_s3_filesystem( + self, + daac: Optional[str] = None, + concept_id: Optional[str] = None, + provider: Optional[str] = None, + endpoint: Optional[str] = None, + ) -> s3fs.S3FileSystem: + """Return an `s3fs.S3FileSystem` instance for a given cloud provider / DAAC. + Parameters: daac: any of the DAACs, e.g. NSIDC, PODAAC provider: a data provider if we know them, e.g. PODAAC -> POCLOUD @@ -360,10 +381,10 @@ def _open_granules( endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"]) if endpoint is not None: logger.info(f"using endpoint: {endpoint}") - s3_fs = self.get_s3fs_session(endpoint=endpoint) + s3_fs = self.get_s3_filesystem(endpoint=endpoint) else: logger.info(f"using provider: {provider}") - s3_fs = self.get_s3fs_session(provider=provider) + s3_fs = self.get_s3_filesystem(provider=provider) else: access = "on_prem" s3_fs = None @@ -416,7 +437,7 @@ def _open_urls( url_mapping: Mapping[str, None] = {url: None for url in granules} if self.in_region and granules[0].startswith("s3"): if provider is not None: - s3_fs = self.get_s3fs_session(provider=provider) + s3_fs = self.get_s3_filesystem(provider=provider) if s3_fs is not None: try: fileset = _open_files( @@ -530,7 +551,7 @@ def _get_urls( ) if self.in_region and data_links[0].startswith("s3"): logger.info(f"Accessing cloud dataset using provider: {provider}") - s3_fs = self.get_s3fs_session(provider=provider) + s3_fs = self.get_s3_filesystem(provider=provider) # TODO: make this parallel or concurrent for file in data_links: s3_fs.get(file, str(local_path)) @@ -573,10 +594,10 @@ def _get_granules( logger.info( f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}" ) - s3_fs = self.get_s3fs_session(endpoint=endpoint) + s3_fs = self.get_s3_filesystem(endpoint=endpoint) else: logger.info(f"Accessing cloud dataset using provider: {provider}") - s3_fs = self.get_s3fs_session(provider=provider) + s3_fs = self.get_s3_filesystem(provider=provider) local_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index ebd52ab7..7c0c1b37 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -107,9 +107,9 @@ def test_get_s3_credentials_lowercase_location(location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3fs_session_lowercase_location(location): +def test_get_s3_filesystem_lowercase_location(location): activate_environment() earthaccess.login(strategy="environment") - fs = earthaccess.get_s3fs_session(**location) + fs = earthaccess.get_s3_filesystem(**location) assert isinstance(fs, s3fs.S3FileSystem) assert all(fs.storage_options[key] for key in ["key", "secret", "token"]) diff --git a/tests/unit/test_deprecations.py b/tests/unit/test_deprecations.py new file mode 100644 index 00000000..cbc93a09 --- /dev/null +++ b/tests/unit/test_deprecations.py @@ -0,0 +1,63 @@ +import contextlib +import warnings +from unittest import mock + +import earthaccess +import pytest +import responses +from earthaccess.api import get_s3fs_session +from earthaccess.store import Store + + +@pytest.fixture(scope="session") +@responses.activate +@mock.patch("getpass.getpass") +@mock.patch("builtins.input") +def auth(user_input, user_password): + user_input.return_value = "user" + user_password.return_value = "password" + json_response = [ + {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"}, + {"access_token": "EDL-token-2", "expiration_date": "12/16/2021"}, + ] + responses.add( + responses.GET, + "https://urs.earthdata.nasa.gov/api/users/tokens", + json=json_response, + status=200, + ) + responses.add( + responses.GET, + "https://urs.earthdata.nasa.gov/profile", + json={"email_address": "test@test.edu"}, + status=200, + ) + + earthaccess.login(strategy="interactive") + + return earthaccess.__auth__ + + +def test_deprecation_warning_for_api(): + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + with contextlib.suppress(AttributeError): + get_s3fs_session() + # Verify some things + assert issubclass(w[0].category, DeprecationWarning) + assert "Use get_s3_filesystem instead" in str(w[0].message) + + +def test_deprecation_warning_for_store(auth): + store = Store(auth) + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + with contextlib.suppress(ValueError): + store.get_s3fs_session() + # Verify some things + assert issubclass(w[0].category, DeprecationWarning) + assert "Use get_s3_filesystem instead" in str(w[0].message) diff --git a/tests/unit/test_store.py b/tests/unit/test_store.py index 4da4e0e3..4e80bd4b 100644 --- a/tests/unit/test_store.py +++ b/tests/unit/test_store.py @@ -101,12 +101,12 @@ def test_store_can_create_s3_fsspec_session(self): "OBDAAC", "ASDC", ]: - s3_fs = store.get_s3fs_session(daac=daac) + s3_fs = store.get_s3_filesystem(daac=daac) assert isinstance(s3_fs, s3fs.S3FileSystem) assert s3_fs.storage_options == expected_storage_options for endpoint in custom_endpoints: - s3_fs = store.get_s3fs_session(endpoint=endpoint) + s3_fs = store.get_s3_filesystem(endpoint=endpoint) assert isinstance(s3_fs, s3fs.S3FileSystem) assert s3_fs.storage_options == expected_storage_options @@ -120,12 +120,12 @@ def test_store_can_create_s3_fsspec_session(self): "OB_CLOUD", "LARC_CLOUD", ]: - s3_fs = store.get_s3fs_session(provider=provider) + s3_fs = store.get_s3_filesystem(provider=provider) assert isinstance(s3_fs, s3fs.S3FileSystem) assert s3_fs.storage_options == expected_storage_options # Ensure informative error is raised with pytest.raises(ValueError, match="parameters must be specified"): - store.get_s3fs_session() + store.get_s3_filesystem() return None