Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecated get_s3fs_session #768

Merged
merged 7 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

### Unreleased

- Deprecate `earthaccess.get_s3fs_session` and `Store.get_s3fs_session`. Use
`earthaccess.get_s3_filesystem` and `Store.get_s3_filesystem`, respectively,
instead ([#766](https://github.com/nsidc/earthaccess/issues/766))([**@Sherwin-14**](https://github.com/Sherwin-14))

## [v0.10.0] 2024-07-19

### Changed
Expand All @@ -16,6 +22,7 @@
([**@danielfromearth**](https://github.com/danielfromearth),[**@chuckwondo**](https://github.com/chuckwondo),
[**@jhkennedy**](https://github.com/jhkennedy),[**@mfisher87**](https://github.com/mfisher87))


### Added

- Enable queries to Earthdata User Acceptance Testing (UAT) system for authenticated accounts
Expand Down
2 changes: 2 additions & 0 deletions earthaccess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
get_fsspec_https_session,
get_requests_https_session,
get_s3_credentials,
get_s3_filesystem,
get_s3fs_session,
granule_query,
login,
Expand All @@ -34,6 +35,7 @@
"get_fsspec_https_session",
"get_s3fs_session",
"get_s3_credentials",
"get_s3_filesystem",
"get_edl_token",
"granule_query",
"collection_query",
Expand Down
27 changes: 24 additions & 3 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
import s3fs
from fsspec import AbstractFileSystem
from typing_extensions import Any, Dict, List, Optional, Union
from typing_extensions import Any, Dict, List, Optional, Union, deprecated

import earthaccess

Expand Down Expand Up @@ -325,13 +325,34 @@ def get_requests_https_session() -> requests.Session:
return session


@deprecated("Use get_s3_filesystem instead")
def get_s3fs_session(
daac: Optional[str] = None,
provider: Optional[str] = None,
results: Optional[DataGranule] = None,
) -> s3fs.S3FileSystem:
"""Returns a fsspec s3fs file session for direct access when we are in us-west-2.

Parameters:
daac: Any DAAC short name e.g. NSIDC, GES_DISC
provider: Each DAAC can have a cloud provider.
If the DAAC is specified, there is no need to use provider.
results: A list of results from search_data().
`earthaccess` will use the metadata from CMR to obtain the S3 Endpoint.

Returns:
An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour.
"""
return get_s3_filesystem(daac, provider, results)


def get_s3_filesystem(
daac: Optional[str] = None,
provider: Optional[str] = None,
results: Optional[DataGranule] = None,
) -> s3fs.S3FileSystem:
"""Return an `s3fs.S3FileSystem` for direct access when running within the AWS us-west-2 region.

Parameters:
daac: Any DAAC short name e.g. NSIDC, GES_DISC
provider: Each DAAC can have a cloud provider.
Expand All @@ -347,9 +368,9 @@ def get_s3fs_session(
if results is not None:
endpoint = results[0].get_s3_credentials_endpoint()
if endpoint is not None:
session = earthaccess.__store__.get_s3fs_session(endpoint=endpoint)
session = earthaccess.__store__.get_s3_filesystem(endpoint=endpoint)
return session
session = earthaccess.__store__.get_s3fs_session(daac=daac, provider=provider)
session = earthaccess.__store__.get_s3_filesystem(daac=daac, provider=provider)
return session


Expand Down
2 changes: 1 addition & 1 deletion earthaccess/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def consolidate_metadata(
) from e

if access == "direct":
fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"])
fs = earthaccess.get_s3_filesystem(provider=granules[0]["meta"]["provider-id"])
else:
fs = earthaccess.get_fsspec_https_session()

Expand Down
33 changes: 27 additions & 6 deletions earthaccess/store.py
Sherwin-14 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import s3fs
from multimethod import multimethod as singledispatchmethod
from pqdm.threads import pqdm
from typing_extensions import deprecated

import earthaccess

Expand Down Expand Up @@ -197,6 +198,7 @@ def set_requests_session(
else:
resp.raise_for_status()

@deprecated("Use get_s3_filesystem instead")
def get_s3fs_session(
self,
daac: Optional[str] = None,
Expand All @@ -206,6 +208,25 @@ def get_s3fs_session(
) -> s3fs.S3FileSystem:
"""Returns a s3fs instance for a given cloud provider / DAAC.

Parameters:
daac: any of the DAACs, e.g. NSIDC, PODAAC
provider: a data provider if we know them, e.g. PODAAC -> POCLOUD
endpoint: pass the URL for the credentials directly

Returns:
An `s3fs.S3FileSystem` authenticated for reading in-region in us-west-2 for 1 hour.
"""
return self.get_s3_filesystem(daac, concept_id, provider, endpoint)

def get_s3_filesystem(
self,
daac: Optional[str] = None,
concept_id: Optional[str] = None,
provider: Optional[str] = None,
endpoint: Optional[str] = None,
) -> s3fs.S3FileSystem:
"""Return an `s3fs.S3FileSystem` instance for a given cloud provider / DAAC.

Parameters:
daac: any of the DAACs, e.g. NSIDC, PODAAC
provider: a data provider if we know them, e.g. PODAAC -> POCLOUD
Expand Down Expand Up @@ -360,10 +381,10 @@ def _open_granules(
endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"])
if endpoint is not None:
logger.info(f"using endpoint: {endpoint}")
s3_fs = self.get_s3fs_session(endpoint=endpoint)
s3_fs = self.get_s3_filesystem(endpoint=endpoint)
else:
logger.info(f"using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
else:
access = "on_prem"
s3_fs = None
Expand Down Expand Up @@ -416,7 +437,7 @@ def _open_urls(
url_mapping: Mapping[str, None] = {url: None for url in granules}
if self.in_region and granules[0].startswith("s3"):
if provider is not None:
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
if s3_fs is not None:
try:
fileset = _open_files(
Expand Down Expand Up @@ -530,7 +551,7 @@ def _get_urls(
)
if self.in_region and data_links[0].startswith("s3"):
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)
# TODO: make this parallel or concurrent
for file in data_links:
s3_fs.get(file, str(local_path))
Expand Down Expand Up @@ -573,10 +594,10 @@ def _get_granules(
logger.info(
f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}"
)
s3_fs = self.get_s3fs_session(endpoint=endpoint)
s3_fs = self.get_s3_filesystem(endpoint=endpoint)
else:
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
s3_fs = self.get_s3_filesystem(provider=provider)

local_path.mkdir(parents=True, exist_ok=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def test_get_s3_credentials_lowercase_location(location):


@pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"}))
def test_get_s3fs_session_lowercase_location(location):
def test_get_s3_filesystem_lowercase_location(location):
activate_environment()
earthaccess.login(strategy="environment")
fs = earthaccess.get_s3fs_session(**location)
fs = earthaccess.get_s3_filesystem(**location)
assert isinstance(fs, s3fs.S3FileSystem)
assert all(fs.storage_options[key] for key in ["key", "secret", "token"])
63 changes: 63 additions & 0 deletions tests/unit/test_deprecations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import contextlib
import warnings
from unittest import mock

import earthaccess
import pytest
import responses
from earthaccess.api import get_s3fs_session
from earthaccess.store import Store


@pytest.fixture(scope="session")
@responses.activate
@mock.patch("getpass.getpass")
@mock.patch("builtins.input")
def auth(user_input, user_password):
user_input.return_value = "user"
user_password.return_value = "password"
json_response = [
{"access_token": "EDL-token-1", "expiration_date": "12/15/2021"},
{"access_token": "EDL-token-2", "expiration_date": "12/16/2021"},
]
responses.add(
responses.GET,
"https://urs.earthdata.nasa.gov/api/users/tokens",
json=json_response,
status=200,
)
responses.add(
responses.GET,
"https://urs.earthdata.nasa.gov/profile",
json={"email_address": "[email protected]"},
status=200,
)

earthaccess.login(strategy="interactive")

return earthaccess.__auth__


def test_deprecation_warning_for_api():
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")
# Trigger a warning.
with contextlib.suppress(AttributeError):
get_s3fs_session()
# Verify some things
assert issubclass(w[0].category, DeprecationWarning)
assert "Use get_s3_filesystem instead" in str(w[0].message)


def test_deprecation_warning_for_store(auth):
store = Store(auth)
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")
# Trigger a warning.
with contextlib.suppress(ValueError):
store.get_s3fs_session()
# Verify some things
assert issubclass(w[0].category, DeprecationWarning)
assert "Use get_s3_filesystem instead" in str(w[0].message)
8 changes: 4 additions & 4 deletions tests/unit/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def test_store_can_create_s3_fsspec_session(self):
"OBDAAC",
"ASDC",
]:
s3_fs = store.get_s3fs_session(daac=daac)
s3_fs = store.get_s3_filesystem(daac=daac)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

for endpoint in custom_endpoints:
s3_fs = store.get_s3fs_session(endpoint=endpoint)
s3_fs = store.get_s3_filesystem(endpoint=endpoint)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

Expand All @@ -120,12 +120,12 @@ def test_store_can_create_s3_fsspec_session(self):
"OB_CLOUD",
"LARC_CLOUD",
]:
s3_fs = store.get_s3fs_session(provider=provider)
s3_fs = store.get_s3_filesystem(provider=provider)
assert isinstance(s3_fs, s3fs.S3FileSystem)
assert s3_fs.storage_options == expected_storage_options

# Ensure informative error is raised
with pytest.raises(ValueError, match="parameters must be specified"):
store.get_s3fs_session()
store.get_s3_filesystem()

return None
Loading