Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace print statements with logger #566

Merged
merged 22 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
intended use cases.
* [#555](https://github.com/nsidc/earthaccess/issues/555): YAML formatting is
now performed with `yamlfmt` instead of `prettier`.
* [#511](https://github.com/nsidc/earthaccess/issues/511): Replaced `print`
calls with `logging` calls, where appropriate and added T20 Ruff rule.
mfisher87 marked this conversation as resolved.
Show resolved Hide resolved

* Enhancements

Expand Down
17 changes: 10 additions & 7 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import requests
import s3fs
from fsspec import AbstractFileSystem
Expand All @@ -12,6 +14,8 @@
from .system import PROD, System
from .utils import _validation as validate

logger = logging.getLogger(__name__)


def _normalize_location(location: Optional[str]) -> Optional[str]:
"""Handle user-provided `daac` and `provider` values.
Expand Down Expand Up @@ -64,16 +68,14 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]:
```
"""
if not validate.valid_dataset_parameters(**kwargs):
print(
"Warning: a valid set of parameters is needed to search for datasets on CMR"
)
logger.warn("A valid set of parameters is needed to search for datasets on CMR")
return []
if earthaccess.__auth__.authenticated:
query = DataCollections(auth=earthaccess.__auth__).parameters(**kwargs)
else:
query = DataCollections().parameters(**kwargs)
datasets_found = query.hits()
print(f"Datasets found: {datasets_found}")
logger.info(f"Datasets found: {datasets_found}")
if count > 0:
return query.get(count)
return query.get_all()
Expand Down Expand Up @@ -120,7 +122,7 @@ def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]:
else:
query = DataGranules().parameters(**kwargs)
granules_found = query.hits()
print(f"Granules found: {granules_found}")
logger.info(f"Granules found: {granules_found}")
if count > 0:
return query.get(count)
return query.get_all()
Expand Down Expand Up @@ -199,8 +201,9 @@ def download(
try:
results = earthaccess.__store__.get(granules, local_path, provider, threads)
except AttributeError as err:
print(err)
print("You must call earthaccess.login() before you can download data")
logger.error(
f"{err}: You must call earthaccess.login() before you can download data"
)
return []
return results

Expand Down
18 changes: 10 additions & 8 deletions earthaccess/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def refresh_tokens(self) -> bool:
)
return True
else:
print(resp_tokens)
logger.info(resp_tokens)
return False

return False
Expand Down Expand Up @@ -222,10 +222,10 @@ def get_s3_credentials(
cumulus_resp.url, allow_redirects=True, timeout=15
)
if not (auth_resp.ok):
print(
logger.error(
f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}"
)
print(
logger.error(
f"Consider accepting the EULAs available at {self._eula_url} and applications at {self._apps_url}"
)
return {}
Expand All @@ -235,10 +235,12 @@ def get_s3_credentials(
else:
# This happens if the cloud provider doesn't list the S3 credentials or the DAAC
# does not have cloud collections yet
print(f"Credentials for the cloud provider {daac} are not available")
logger.info(
f"Credentials for the cloud provider {daac} are not available"
)
return {}
else:
print("We need to authenticate with EDL first")
logger.info("We need to authenticate with EDL first")
return {}

def get_session(self, bearer_token: bool = True) -> requests.Session:
Expand Down Expand Up @@ -266,7 +268,7 @@ def _interactive(self, persist_credentials: bool = False) -> bool:
if authenticated:
logger.debug("Using user provided credentials for EDL")
if persist_credentials:
print("Persisting credentials to .netrc")
logger.info("Persisting credentials to .netrc")
self._persist_user_credentials(username, password)
return authenticated

Expand Down Expand Up @@ -307,7 +309,7 @@ def _get_credentials(
token_resp = self._get_user_tokens(username, password)

if not (token_resp.ok): # type: ignore
print(
logger.info(
f"Authentication with Earthdata Login failed with:\n{token_resp.text}"
)
return False
Expand Down Expand Up @@ -376,7 +378,7 @@ def _persist_user_credentials(self, username: str, password: str) -> bool:
netrc_path.touch(exist_ok=True)
netrc_path.chmod(0o600)
except Exception as e:
print(e)
logger.error(e)
return False
my_netrc = Netrc(str(netrc_path))
my_netrc[self.system.edl_hostname] = {
Expand Down
9 changes: 6 additions & 3 deletions earthaccess/search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime as dt
import logging
from inspect import getmembers, ismethod

import requests
Expand All @@ -21,6 +22,8 @@
from .daac import find_provider, find_provider_by_shortname
from .results import DataCollection, DataGranule

logger = logging.getLogger(__name__)

FloatLike: TypeAlias = Union[str, SupportsFloat]
PointLike: TypeAlias = Tuple[FloatLike, FloatLike]

Expand Down Expand Up @@ -306,8 +309,8 @@ def parameters(self, **kwargs: Any) -> Self:

def print_help(self, method: str = "fields") -> None:
"""Prints the help information for a given method."""
print("Class components: \n")
print([method for method in dir(self) if method.startswith("_") is False])
print("Class components: \n") # noqa: T201
print([method for method in dir(self) if method.startswith("_") is False]) # noqa: T201
help(getattr(self, method))

def fields(self, fields: Optional[List[str]] = None) -> Self:
Expand Down Expand Up @@ -978,7 +981,7 @@ def doi(self, doi: str) -> Self:
else:
# TODO consider removing this print statement since we don't print such
# a message in other cases where no results are found. Seems arbitrary.
print(
logger.info(
f"earthaccess couldn't find any associated collections with the DOI: {doi}"
)

Expand Down
35 changes: 18 additions & 17 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import logging
import shutil
import traceback
from functools import lru_cache
Expand All @@ -21,6 +22,8 @@
from .results import DataGranule
from .search import DataCollections

logger = logging.getLogger(__name__)


class EarthAccessFile(fsspec.spec.AbstractBufferedFile):
def __init__(self, f: fsspec.AbstractFileSystem, granule: DataGranule) -> None:
Expand Down Expand Up @@ -110,7 +113,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None:
self.set_requests_session(url)

else:
print("Warning: the current session is not authenticated with NASA")
logger.warn("The current session is not authenticated with NASA")
self.auth = None
self.in_region = self._running_in_us_west_2()

Expand Down Expand Up @@ -339,7 +342,7 @@ def _open_granules(
) -> List[Any]:
fileset: List = []
total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
print(f"Opening {len(granules)} granules, approx size: {total_size} GB")
logger.info(f"Opening {len(granules)} granules, approx size: {total_size} GB")

if self.auth is None:
raise ValueError(
Expand All @@ -353,10 +356,10 @@ def _open_granules(
# if the data has its own S3 credentials endpoint, we will use it
endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"])
if endpoint is not None:
print(f"using endpoint: {endpoint}")
logger.info(f"using endpoint: {endpoint}")
s3_fs = self.get_s3fs_session(endpoint=endpoint)
else:
print(f"using provider: {provider}")
logger.info(f"using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
else:
access = "on_prem"
Expand Down Expand Up @@ -425,7 +428,7 @@ def _open_urls(
f"Exception: {traceback.format_exc()}"
) from e
else:
print(f"Provider {provider} has no valid cloud credentials")
logger.info(f"Provider {provider} has no valid cloud credentials")
return fileset
else:
raise ValueError(
Expand Down Expand Up @@ -523,13 +526,13 @@ def _get_urls(
"we need to use one from earthaccess.list_cloud_providers()"
)
if self.in_region and data_links[0].startswith("s3"):
print(f"Accessing cloud dataset using provider: {provider}")
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)
# TODO: make this parallel or concurrent
for file in data_links:
s3_fs.get(file, str(local_path))
file_name = local_path / Path(file).name
print(f"Downloaded: {file_name}")
logger.info(f"Downloaded: {file_name}")
downloaded_files.append(file_name)
return downloaded_files

Expand Down Expand Up @@ -559,17 +562,17 @@ def _get_granules(
)
)
total_size = round(sum([granule.size() for granule in granules]) / 1024, 2)
print(
logger.info(
f" Getting {len(granules)} granules, approx download size: {total_size} GB"
)
if access == "direct":
if endpoint is not None:
print(
logger.info(
f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}"
)
s3_fs = self.get_s3fs_session(endpoint=endpoint)
else:
print(f"Accessing cloud dataset using provider: {provider}")
logger.info(f"Accessing cloud dataset using provider: {provider}")
s3_fs = self.get_s3fs_session(provider=provider)

local_path.mkdir(parents=True, exist_ok=True)
Expand All @@ -578,7 +581,7 @@ def _get_granules(
for file in data_links:
s3_fs.get(file, str(local_path))
file_name = local_path / Path(file).name
print(f"Downloaded: {file_name}")
logger.info(f"Downloaded: {file_name}")
downloaded_files.append(file_name)
return downloaded_files
else:
Expand Down Expand Up @@ -615,11 +618,10 @@ def _download_file(self, url: str, directory: Path) -> str:
# https://docs.python-requests.org/en/latest/user/quickstart/#raw-response-content
shutil.copyfileobj(r.raw, f, length=1024 * 1024)
except Exception:
print(f"Error while downloading the file {local_filename}")
print(traceback.format_exc())
logger.exception(f"Error while downloading the file {local_filename}")
raise Exception
else:
print(f"File {local_filename} already downloaded")
logger.info(f"File {local_filename} already downloaded")
return str(path)

def _download_onprem_granules(
Expand Down Expand Up @@ -663,8 +665,7 @@ def _open_urls_https(
try:
fileset = _open_files(url_mapping, https_fs, threads)
except Exception:
print(
"An exception occurred while trying to access remote files via HTTPS: "
f"Exception: {traceback.format_exc()}"
logger.exception(
"An exception occurred while trying to access remote files via HTTPS"
)
return fileset
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ line-length = 88
src = ["earthaccess", "stubs", "tests"]

[tool.ruff.lint]
extend-select = ["I"]
extend-select = ["I", "T20"]

[tool.ruff.lint.isort]
combine-as-imports = true
Expand Down
7 changes: 3 additions & 4 deletions tests/integration/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,9 @@ def test_auth_can_read_from_netrc_file():
def test_auth_throws_exception_if_netrc_is_not_present():
activate_environment()
delete_netrc()
with pytest.raises(Exception) as e_info:
with pytest.raises(Exception):
earthaccess.login(strategy="netrc")
assertions.assertRaises(FileNotFoundError)
print(e_info)


def test_auth_populates_attrs():
Expand All @@ -87,12 +86,12 @@ def test_auth_can_fetch_s3_credentials():
for daac in earthaccess.daac.DAACS:
if len(daac["s3-credentials"]) > 0:
try:
print(f"Testing S3 credentials for {daac['short-name']}")
logger.info(f"Testing S3 credentials for {daac['short-name']}")
credentials = earthaccess.get_s3_credentials(daac["short-name"])
assertions.assertIsInstance(credentials, dict)
assertions.assertTrue("accessKeyId" in credentials)
except Exception as e:
print(
logger.error(
f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}"
)
Comment on lines 88 to 96
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't be logging messages from test functions. In fact, I suspect the try/except should be removed. If an exception occurs during the test, the test should fail. However, I don't know if this is an exception to that rule (no pun intended).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

botanical@64ec135 it seems intentional to me according to this commit 😯

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I suspect the try/except should be removed.

it seems intentional to me according to this commit 😯

It does still seem weird. But let's be intentional about the scope of this PR :) Can we take this to an issue / discussion?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I suspect the try/except should be removed.

it seems intentional to me according to this commit 😯

It does still seem weird. But let's be intentional about the scope of this PR :) Can we take this to an issue / discussion?

Sounds good to me. @botanical, would you mind opening a separate issue for this? Also, would you open yet another issue about refactoring loops in tests to instead be parametrized tests (pytest.parametrize, and yes, that is how it's spelled, which is apparently a valid variant of parameterize).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that is how it's spelled, which is apparently a valid variant of parameterize

Gets me every time

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


Expand Down
4 changes: 3 additions & 1 deletion tests/unit/test_auth.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# package imports
import logging
import unittest
from unittest import mock

import pytest
import responses
from earthaccess import Auth

logger = logging.getLogger(__name__)


class TestCreateAuth(unittest.TestCase):
@responses.activate
Expand Down Expand Up @@ -107,7 +110,6 @@ def test_auth_fails_for_wrong_credentials(self, user_input, user_password):
auth = Auth()
auth.login(strategy="interactive")
with pytest.raises(Exception) as e_info:
print(e_info)
self.assertEqual(auth.authenticated, False)
self.assertEqual(e_info, Exception)
self.assertEqual(auth.password, "password")
Expand Down
Loading