Skip to content

Commit

Permalink
Deprecated get_s3fs_session (nsidc#768)
Browse files Browse the repository at this point in the history
Co-authored-by: Chuck Daniels <[email protected]>
  • Loading branch information
Sherwin-14 and chuckwondo authored Jul 27, 2024
1 parent 3a8f6ac commit 0945db5
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 16 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

### Unreleased

- Deprecate `earthaccess.get_s3fs_session` and `Store.get_s3fs_session`. Use
`earthaccess.get_s3_filesystem` and `Store.get_s3_filesystem`, respectively,
instead ([#766](https://github.com/nsidc/earthaccess/issues/766))([**@Sherwin-14**](https://github.com/Sherwin-14))

## [v0.10.0] 2024-07-19

### Changed
Expand All @@ -16,6 +22,7 @@
([**@danielfromearth**](https://github.com/danielfromearth),[**@chuckwondo**](https://github.com/chuckwondo),
[**@jhkennedy**](https://github.com/jhkennedy),[**@mfisher87**](https://github.com/mfisher87))


### Added

- Enable queries to Earthdata User Acceptance Testing (UAT) system for authenticated accounts
Expand Down
2 changes: 2 additions & 0 deletions earthaccess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
get_fsspec_https_session,
get_requests_https_session,
get_s3_credentials,
get_s3_filesystem,
get_s3fs_session,
granule_query,
login,
Expand All @@ -34,6 +35,7 @@
"get_fsspec_https_session",
"get_s3fs_session",
"get_s3_credentials",
"get_s3_filesystem",
"get_edl_token",
"granule_query",
"collection_query",
Expand Down
27 changes: 24 additions & 3 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
import s3fs
from fsspec import AbstractFileSystem
from typing_extensions import Any, Dict, List, Optional, Union
from typing_extensions import Any, Dict, List, Optional, Union, deprecated

import earthaccess

Expand Down Expand Up @@ -325,13 +325,34 @@ def get_requests_https_session() -> requests.Session:
return session


@deprecated("Use get_s3_filesystem instead")
def get_s3fs_session(
daac: Optional[str] = None,
provider: Optional[str] = None,
results: Optional[DataGranule] = None,
) -> s3fs.S3FileSystem:
"""Returns a fsspec s3fs file session for direct access when we are in us-west-2.
Parameters:
daac: Any DAAC short name e.g. NSIDC, GES_DISC
provider: Each DAAC can have a cloud provider.
If the DAAC is specified, there is no need to use provider.
results: A list of results from search_data().
`earthaccess` will use the metadata from CMR to obtain the S3 Endpoint.
Returns:
An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour.
"""
return get_s3_filesystem(daac, provider, results)


def get_s3_filesystem(
daac: Optional[str] = None,
provider: Optional[str] = None,
results: Optional[DataGranule] = None,
) -> s3fs.S3FileSystem:
"""Return an `s3fs.S3FileSystem` for direct access when running within the AWS us-west-2 region.
Parameters:
daac: Any DAAC short name e.g. NSIDC, GES_DISC
provider: Each DAAC can have a cloud provider.
Expand All @@ -347,9 +368,9 @@ def get_s3fs_session(
if results is not None:
endpoint = results[0].get_s3_credentials_endpoint()
if endpoint is not None:
session = earthaccess.__store__.get_s3fs_session(endpoint=endpoint)
session = earthaccess.__store__.get_s3_filesystem(endpoint=endpoint)
return session
session = earthaccess.__store__.get_s3fs_session(daac=daac, provider=provider)
session = earthaccess.__store__.get_s3_filesystem(daac=daac, provider=provider)
return session


Expand Down
2 changes: 1 addition & 1 deletion earthaccess/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def consolidate_metadata(
) from e

if access == "direct":
fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"])
fs = earthaccess.get_s3_filesystem(provider=granules[0]["meta"]["provider-id"])
else:
fs = earthaccess.get_fsspec_https_session()

Expand Down
33 changes: 27 additions & 6 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import s3fs
from multimethod import multimethod as singledispatchmethod
from pqdm.threads import pqdm
from typing_extensions import deprecated

import earthaccess

Expand Down Expand Up @@ -197,6 +198,7 @@ def set_requests_session(
else:
resp.raise_for_status()

@deprecated("Use get_s3_filesystem instead")
def get_s3fs_session(
self,
daac: Optional[str] = None,
Expand All @@ -206,6 +208,25 @@ def get_s3fs_session(
) -> s3fs.S3FileSystem:
"""Returns a s3fs instance for a given cloud provider / DAAC.
Parameters:
daac: any of the DAACs, e.g. NSIDC, PODAAC
provider: a data provider if we know them, e.g. PODAAC -> POCLOUD
endpoint: pass the URL for the credentials directly
Returns:
An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour.
"""
return self.get_s3_filesystem(daac, concept_id, provider, endpoint)

def get_s3_filesystem(
self,
daac: Optional[str] = None,
concept_id: Optional[str] = None,
provider: Optional[str] = None,
endpoint: Optional[str] = None,
) -> s3fs.S3FileSystem:
"""Return an `s3fs.S3FileSystem` instance for a given cloud provider / DAAC.
Parameters:
daac: any of the DAACs, e.g. NSIDC, PODAAC
provider: a data provider if we know them, e.g. PODAAC -> POCLOUD
Expand Down Expand Up @@ -360,10 +381,10 @@ def _open_granules(
endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"])
if endpoint is not None:
logger.info(f"using endpoint: {endpoint}")
s3_fs = self.get_s3fs_session(endpoint=endpoint)
s3_fs = self.get_s3_filesystem(endpoint=endpoint)
else:
logger.info(f"using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
else:
access = "on_prem"
s3_fs = None
Expand Down Expand Up @@ -416,7 +437,7 @@ def _open_urls(
url_mapping: Mapping[str, None] = {url: None for url in granules}
if self.in_region and granules[0].startswith("s3"):
if provider is not None:
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
if s3_fs is not None:
try:
fileset = _open_files(
Expand Down Expand Up @@ -530,7 +551,7 @@ def _get_urls(
)
if self.in_region and data_links[0].startswith("s3"):
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
# TODO: make this parallel or concurrent
for file in data_links:
s3_fs.get(file, str(local_path))
Expand Down Expand Up @@ -573,10 +594,10 @@ def _get_granules(
logger.info(
f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}"
)
s3_fs = self.get_s3fs_session(endpoint=endpoint)
s3_fs = self.get_s3_filesystem(endpoint=endpoint)
else:
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)

local_path.mkdir(parents=True, exist_ok=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def test_get_s3_credentials_lowercase_location(location):


@pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"}))
def test_get_s3fs_session_lowercase_location(location):
def test_get_s3_filesystem_lowercase_location(location):
activate_environment()
earthaccess.login(strategy="environment")
fs = earthaccess.get_s3fs_session(**location)
fs = earthaccess.get_s3_filesystem(**location)
assert isinstance(fs, s3fs.S3FileSystem)
assert all(fs.storage_options[key] for key in ["key", "secret", "token"])
63 changes: 63 additions & 0 deletions tests/unit/test_deprecations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import contextlib
import warnings
from unittest import mock

import earthaccess
import pytest
import responses
from earthaccess.api import get_s3fs_session
from earthaccess.store import Store


@pytest.fixture(scope="session")
@responses.activate
@mock.patch("getpass.getpass")
@mock.patch("builtins.input")
def auth(user_input, user_password):
user_input.return_value = "user"
user_password.return_value = "password"
json_response = [
{"access_token": "EDL-token-1", "expiration_date": "12/15/2021"},
{"access_token": "EDL-token-2", "expiration_date": "12/16/2021"},
]
responses.add(
responses.GET,
"https://urs.earthdata.nasa.gov/api/users/tokens",
json=json_response,
status=200,
)
responses.add(
responses.GET,
"https://urs.earthdata.nasa.gov/profile",
json={"email_address": "[email protected]"},
status=200,
)

earthaccess.login(strategy="interactive")

return earthaccess.__auth__


def test_deprecation_warning_for_api():
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")
# Trigger a warning.
with contextlib.suppress(AttributeError):
get_s3fs_session()
# Verify some things
assert issubclass(w[0].category, DeprecationWarning)
assert "Use get_s3_filesystem instead" in str(w[0].message)


def test_deprecation_warning_for_store(auth):
store = Store(auth)
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")
# Trigger a warning.
with contextlib.suppress(ValueError):
store.get_s3fs_session()
# Verify some things
assert issubclass(w[0].category, DeprecationWarning)
assert "Use get_s3_filesystem instead" in str(w[0].message)
8 changes: 4 additions & 4 deletions tests/unit/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def test_store_can_create_s3_fsspec_session(self):
"OBDAAC",
"ASDC",
]:
s3_fs = store.get_s3fs_session(daac=daac)
s3_fs = store.get_s3_filesystem(daac=daac)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

for endpoint in custom_endpoints:
s3_fs = store.get_s3fs_session(endpoint=endpoint)
s3_fs = store.get_s3_filesystem(endpoint=endpoint)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

Expand All @@ -120,12 +120,12 @@ def test_store_can_create_s3_fsspec_session(self):
"OB_CLOUD",
"LARC_CLOUD",
]:
s3_fs = store.get_s3fs_session(provider=provider)
s3_fs = store.get_s3_filesystem(provider=provider)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

# Ensure informative error is raised
with pytest.raises(ValueError, match="parameters must be specified"):
store.get_s3fs_session()
store.get_s3_filesystem()

return None

0 comments on commit 0945db5

Please sign in to comment.