diff --git a/CHANGELOG.md b/CHANGELOG.md index ee595c50..aabad758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ intended use cases. * [#555](https://github.com/nsidc/earthaccess/issues/555): YAML formatting is now performed with `yamlfmt` instead of `prettier`. + * [#511](https://github.com/nsidc/earthaccess/issues/511): Replaced `print` + calls with `logging` calls where appropriate and added T20 Ruff rule. * Enhancements diff --git a/earthaccess/api.py b/earthaccess/api.py index e2c6aa1e..5e82fd60 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,3 +1,5 @@ +import logging + import requests import s3fs from fsspec import AbstractFileSystem @@ -12,6 +14,8 @@ from .system import PROD, System from .utils import _validation as validate +logger = logging.getLogger(__name__) + def _normalize_location(location: Optional[str]) -> Optional[str]: """Handle user-provided `daac` and `provider` values. @@ -64,16 +68,14 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: ``` """ if not validate.valid_dataset_parameters(**kwargs): - print( - "Warning: a valid set of parameters is needed to search for datasets on CMR" - ) + logger.warn("A valid set of parameters is needed to search for datasets on CMR") return [] if earthaccess.__auth__.authenticated: query = DataCollections(auth=earthaccess.__auth__).parameters(**kwargs) else: query = DataCollections().parameters(**kwargs) datasets_found = query.hits() - print(f"Datasets found: {datasets_found}") + logger.info(f"Datasets found: {datasets_found}") if count > 0: return query.get(count) return query.get_all() @@ -120,7 +122,7 @@ def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: else: query = DataGranules().parameters(**kwargs) granules_found = query.hits() - print(f"Granules found: {granules_found}") + logger.info(f"Granules found: {granules_found}") if count > 0: return query.get(count) return query.get_all() @@ -199,8 +201,9 @@ def download( try: results = earthaccess.__store__.get(granules, local_path, provider, threads) except AttributeError as err: - print(err) - print("You must call earthaccess.login() before you can download data") + logger.error( + f"{err}: You must call earthaccess.login() before you can download data" + ) return [] return results diff --git a/earthaccess/auth.py b/earthaccess/auth.py index b8995f9d..9c103325 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -177,7 +177,7 @@ def refresh_tokens(self) -> bool: ) return True else: - print(resp_tokens) + logger.info(resp_tokens) return False return False @@ -222,10 +222,10 @@ def get_s3_credentials( cumulus_resp.url, allow_redirects=True, timeout=15 ) if not (auth_resp.ok): - print( + logger.error( f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}" ) - print( + logger.error( f"Consider accepting the EULAs available at {self._eula_url} and applications at {self._apps_url}" ) return {} @@ -235,10 +235,12 @@ def get_s3_credentials( else: # This happens if the cloud provider doesn't list the S3 credentials or the DAAC # does not have cloud collections yet - print(f"Credentials for the cloud provider {daac} are not available") + logger.info( + f"Credentials for the cloud provider {daac} are not available" + ) return {} else: - print("We need to authenticate with EDL first") + logger.info("We need to authenticate with EDL first") return {} def get_session(self, bearer_token: bool = True) -> requests.Session: @@ -266,7 +268,7 @@ def _interactive(self, persist_credentials: bool = False) -> bool: if authenticated: logger.debug("Using user provided credentials for EDL") if persist_credentials: - print("Persisting credentials to .netrc") + logger.info("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated @@ -307,7 +309,7 @@ def _get_credentials( token_resp = self._get_user_tokens(username, password) if not (token_resp.ok): # type: ignore - print( + logger.info( f"Authentication with Earthdata Login failed with:\n{token_resp.text}" ) return False @@ -376,7 +378,7 @@ def _persist_user_credentials(self, username: str, password: str) -> bool: netrc_path.touch(exist_ok=True) netrc_path.chmod(0o600) except Exception as e: - print(e) + logger.error(e) return False my_netrc = Netrc(str(netrc_path)) my_netrc[self.system.edl_hostname] = { diff --git a/earthaccess/search.py b/earthaccess/search.py index 9f73af14..13f8c315 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -1,4 +1,5 @@ import datetime as dt +import logging from inspect import getmembers, ismethod import requests @@ -21,6 +22,8 @@ from .daac import find_provider, find_provider_by_shortname from .results import DataCollection, DataGranule +logger = logging.getLogger(__name__) + FloatLike: TypeAlias = Union[str, SupportsFloat] PointLike: TypeAlias = Tuple[FloatLike, FloatLike] @@ -306,8 +309,8 @@ def parameters(self, **kwargs: Any) -> Self: def print_help(self, method: str = "fields") -> None: """Prints the help information for a given method.""" - print("Class components: \n") - print([method for method in dir(self) if method.startswith("_") is False]) + print("Class components: \n") # noqa: T201 + print([method for method in dir(self) if method.startswith("_") is False]) # noqa: T201 help(getattr(self, method)) def fields(self, fields: Optional[List[str]] = None) -> Self: @@ -978,7 +981,7 @@ def doi(self, doi: str) -> Self: else: # TODO consider removing this print statement since we don't print such # a message in other cases where no results are found. Seems arbitrary. - print( + logger.info( f"earthaccess couldn't find any associated collections with the DOI: {doi}" ) diff --git a/earthaccess/store.py b/earthaccess/store.py index e56e9849..fa0e8a99 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -1,4 +1,5 @@ import datetime +import logging import shutil import traceback from functools import lru_cache @@ -21,6 +22,8 @@ from .results import DataGranule from .search import DataCollections +logger = logging.getLogger(__name__) + class EarthAccessFile(fsspec.spec.AbstractBufferedFile): def __init__(self, f: fsspec.AbstractFileSystem, granule: DataGranule) -> None: @@ -110,7 +113,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None: self.set_requests_session(url) else: - print("Warning: the current session is not authenticated with NASA") + logger.warn("The current session is not authenticated with NASA") self.auth = None self.in_region = self._running_in_us_west_2() @@ -339,7 +342,7 @@ def _open_granules( ) -> List[Any]: fileset: List = [] total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) - print(f"Opening {len(granules)} granules, approx size: {total_size} GB") + logger.info(f"Opening {len(granules)} granules, approx size: {total_size} GB") if self.auth is None: raise ValueError( @@ -353,10 +356,10 @@ def _open_granules( # if the data has its own S3 credentials endpoint, we will use it endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"]) if endpoint is not None: - print(f"using endpoint: {endpoint}") + logger.info(f"using endpoint: {endpoint}") s3_fs = self.get_s3fs_session(endpoint=endpoint) else: - print(f"using provider: {provider}") + logger.info(f"using provider: {provider}") s3_fs = self.get_s3fs_session(provider=provider) else: access = "on_prem" @@ -425,7 +428,7 @@ def _open_urls( f"Exception: {traceback.format_exc()}" ) from e else: - print(f"Provider {provider} has no valid cloud credentials") + logger.info(f"Provider {provider} has no valid cloud credentials") return fileset else: raise ValueError( @@ -523,13 +526,13 @@ def _get_urls( "we need to use one from earthaccess.list_cloud_providers()" ) if self.in_region and data_links[0].startswith("s3"): - print(f"Accessing cloud dataset using provider: {provider}") + logger.info(f"Accessing cloud dataset using provider: {provider}") s3_fs = self.get_s3fs_session(provider=provider) # TODO: make this parallel or concurrent for file in data_links: s3_fs.get(file, str(local_path)) file_name = local_path / Path(file).name - print(f"Downloaded: {file_name}") + logger.info(f"Downloaded: {file_name}") downloaded_files.append(file_name) return downloaded_files @@ -559,17 +562,17 @@ def _get_granules( ) ) total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) - print( + logger.info( f" Getting {len(granules)} granules, approx download size: {total_size} GB" ) if access == "direct": if endpoint is not None: - print( + logger.info( f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}" ) s3_fs = self.get_s3fs_session(endpoint=endpoint) else: - print(f"Accessing cloud dataset using provider: {provider}") + logger.info(f"Accessing cloud dataset using provider: {provider}") s3_fs = self.get_s3fs_session(provider=provider) local_path.mkdir(parents=True, exist_ok=True) @@ -578,7 +581,7 @@ def _get_granules( for file in data_links: s3_fs.get(file, str(local_path)) file_name = local_path / Path(file).name - print(f"Downloaded: {file_name}") + logger.info(f"Downloaded: {file_name}") downloaded_files.append(file_name) return downloaded_files else: @@ -615,11 +618,10 @@ def _download_file(self, url: str, directory: Path) -> str: # https://docs.python-requests.org/en/latest/user/quickstart/#raw-response-content shutil.copyfileobj(r.raw, f, length=1024 * 1024) except Exception: - print(f"Error while downloading the file {local_filename}") - print(traceback.format_exc()) + logger.exception(f"Error while downloading the file {local_filename}") raise Exception else: - print(f"File {local_filename} already downloaded") + logger.info(f"File {local_filename} already downloaded") return str(path) def _download_onprem_granules( @@ -663,8 +665,7 @@ def _open_urls_https( try: fileset = _open_files(url_mapping, https_fs, threads) except Exception: - print( - "An exception occurred while trying to access remote files via HTTPS: " - f"Exception: {traceback.format_exc()}" + logger.exception( + "An exception occurred while trying to access remote files via HTTPS" ) return fileset diff --git a/pyproject.toml b/pyproject.toml index a5eb13f7..f1d38a74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,7 +125,7 @@ line-length = 88 src = ["earthaccess", "stubs", "tests"] [tool.ruff.lint] -extend-select = ["I"] +extend-select = ["I", "T20"] [tool.ruff.lint.isort] combine-as-imports = true diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index a4879d12..aef60299 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -59,10 +59,9 @@ def test_auth_can_read_from_netrc_file(): def test_auth_throws_exception_if_netrc_is_not_present(): activate_environment() delete_netrc() - with pytest.raises(Exception) as e_info: + with pytest.raises(Exception): earthaccess.login(strategy="netrc") assertions.assertRaises(FileNotFoundError) - print(e_info) def test_auth_populates_attrs(): @@ -87,12 +86,12 @@ def test_auth_can_fetch_s3_credentials(): for daac in earthaccess.daac.DAACS: if len(daac["s3-credentials"]) > 0: try: - print(f"Testing S3 credentials for {daac['short-name']}") + logger.info(f"Testing S3 credentials for {daac['short-name']}") credentials = earthaccess.get_s3_credentials(daac["short-name"]) assertions.assertIsInstance(credentials, dict) assertions.assertTrue("accessKeyId" in credentials) except Exception as e: - print( + logger.error( f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}" ) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index dbed96b1..8c2b9be5 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -1,4 +1,5 @@ # package imports +import logging import unittest from unittest import mock @@ -6,6 +7,8 @@ import responses from earthaccess import Auth +logger = logging.getLogger(__name__) + class TestCreateAuth(unittest.TestCase): @responses.activate @@ -107,7 +110,6 @@ def test_auth_fails_for_wrong_credentials(self, user_input, user_password): auth = Auth() auth.login(strategy="interactive") with pytest.raises(Exception) as e_info: - print(e_info) self.assertEqual(auth.authenticated, False) self.assertEqual(e_info, Exception) self.assertEqual(auth.password, "password")