From f5adfaad97684806bf84d2455039e38fc1d6cc6a Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 16 Sep 2024 19:58:48 -0400 Subject: [PATCH 1/2] Use temp .netrc file for integration tests Also, support `NETRC` environment variable for specifying alternative location of `.netrc` file. Fixes #806 Fixes #743 Fixes #480 --- .github/workflows/integration-test.yml | 32 +++++++-- .gitignore | 1 + CHANGELOG.md | 9 +++ docs/howto/authenticate.md | 62 ++++++++++------ earthaccess/__init__.py | 4 +- earthaccess/auth.py | 53 +++++++++++--- earthaccess/kerchunk.py | 34 +++++---- scripts/integration-test.sh | 2 +- tests/integration/conftest.py | 38 ++++++++++ tests/integration/test_api.py | 46 +++++------- tests/integration/test_auth.py | 86 +++++++---------------- tests/integration/test_cloud_download.py | 45 ++++-------- tests/integration/test_cloud_open.py | 38 +++------- tests/integration/test_kerchunk.py | 20 +----- tests/integration/test_onprem_download.py | 68 +++++++----------- tests/integration/test_onprem_open.py | 58 ++++++--------- 16 files changed, 297 insertions(+), 299 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index e0e3ca07..360f71ed 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1,7 +1,10 @@ name: Integration Tests on: - pull_request: + pull_request_target: + types: + - opened + - synchronize push: branches: - main @@ -30,14 +33,33 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v4 + - name: Fetch user permission + id: permission + uses: actions-cool/check-user-permission@v2 + with: + require: write + username: ${{ github.triggering_actor }} + - name: Check user permission + if: steps.permission.outputs.require-result == 'false' + # If the triggering actor does not have write permission (i.e., this is a + # PR from a fork), then we exit, otherwise most of the integration tests will + # fail because they require access to secrets. In this case, a maintainer + # will need to make sure the PR looks safe, and if so, manually re-run the + # failed actions. + run: | + echo "User ${{ github.triggering_actor }} does not have permission to run integration tests." + echo "Current permission level is ${{ steps.permission.outputs.user-permission }}." + echo "Job originally triggered by ${{ github.actor }}." + exit 1 + - name: Checkout source + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Get full python version id: full-python-version - run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") + run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT - name: Install poetry uses: abatilo/actions-poetry@v3 - name: Configure poetry @@ -56,12 +78,12 @@ jobs: run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv - name: Install Dependencies if: ${{ !env.ACT }} - run: poetry install + run: poetry install --quiet --extras kerchunk - name: Install Dependencies if: ${{ env.ACT }} # When using `act` to run the workflow locally, the `poetry install` command # may fail due to network issues when running multiple Docker containers. - run: poetry install || poetry install || poetry install + run: poetry install --quiet --extras kerchunk || poetry install --quiet --extras kerchunk || poetry install --quiet --extras kerchunk - name: Test env: EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} diff --git a/.gitignore b/.gitignore index 356afb4f..e2236a54 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ htmlcov dist site .coverage +.coverage.* coverage.xml .netlify test.db diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5a09a0..3f620bda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ instead ([#766](https://github.com/nsidc/earthaccess/issues/766)) ([**@Sherwin-14**](https://github.com/Sherwin-14), [**@chuckwondo**](https://github.com/chuckwondo)) +- Use built-in `assert` statement in integration tests + ([#743](https://github.com/nsidc/earthaccess/issues/743)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ### Added @@ -25,12 +28,18 @@ [**@chuckwondo**](https://github.com/chuckwondo), [**@mfisher87**](https://github.com/mfisher87), [**@betolink**](https://github.com/betolink)) +- Support use of `NETRC` environment variable to override default `.netrc` file + location ([#480](https://github.com/nsidc/earthaccess/issues/480)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ### Fixed - Removed Broken Link "Introduction to NASA earthaccess" ([#779](https://github.com/nsidc/earthaccess/issues/779)) ([**@Sherwin-14**](https://github.com/Sherwin-14)) +- Integration tests no longer clobber existing `.netrc` file + ([#806](https://github.com/nsidc/earthaccess/issues/806)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ## [0.10.0] 2024-07-19 diff --git a/docs/howto/authenticate.md b/docs/howto/authenticate.md index 407e5140..ec94f5ea 100644 --- a/docs/howto/authenticate.md +++ b/docs/howto/authenticate.md @@ -1,10 +1,14 @@ -## Authenticate with Earthdata Login +# Authenticate with Earthdata Login -The first step to use NASA Earthdata is to create an account with Earthdata Login, please follow the instructions at [NASA EDL](https://urs.earthdata.nasa.gov/) +The first step to use NASA Earthdata is to create an account with Earthdata +Login, please follow the instructions at +[NASA EDL](https://urs.earthdata.nasa.gov/) -Once registered, earthaccess can use environment variables, a `.netrc` file or interactive input from a user to login with NASA EDL. +Once registered, earthaccess can use environment variables, a `.netrc` file or +interactive input from a user to login with NASA EDL. -If a strategy is not especified, env vars will be used first, then netrc and finally user's input. +If a strategy is not specified, environment variables will be used first, then +a `.netrc` (if found, see below), and finally a user's input. ```py import earthaccess @@ -12,35 +16,48 @@ import earthaccess auth = earthaccess.login() ``` -If you have a .netrc file with your Earthdata Login credentials +If you have a `.netrc` file (see below) with your Earthdata Login credentials, +you can explicitly specify its use: ```py auth = earthaccess.login(strategy="netrc") ``` -If your Earthdata Login credentials are set as environment variables: EARTHDATA_USERNAME, EARTHDATA_PASSWORD +If your Earthdata Login credentials are set as the environment variables +`EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD`, you can explicitly specify their +use: ```py auth = earthaccess.login(strategy="environment") ``` -If you wish to enter your Earthdata Login credentials when prompted with optional persistence to .netrc +If you wish to enter your Earthdata Login credentials when prompted, with +optional persistence to your `.netrc` file (see below), specify the interactive +strategy: ```py auth = earthaccess.login(strategy="interactive", persist=True) ``` +## Authentication +By default, `earthaccess` with automatically look for your EDL account +credentials in two locations: -### **Authentication** +1. A `.netrc` file: By default, this is either `~/_netrc` (on a Windows system) + or `~/.netrc` (on a non-Windows system). On *any* system, you may override + the default location by setting the `NETRC` environment variable to the path + of your desired `.netrc` file. -By default, `earthaccess` with automatically look for your EDL account credentials in two locations: - -1. A `~/.netrc` file + **NOTE**: When setting the `NETRC` environment variable, there is no + requirement to use a specific filename. The name `.netrc` is common, but + used throughout documentation primarily for convenience. The only + requirement is that the *contents* of the file adhere to the + [`.netrc` file format](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). 2. `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` environment variables -If neither of these options are configured, you can authenticate by calling the `earthaccess.login()` method -and manually entering your EDL account credentials. +If neither of these options are configured, you can authenticate by calling the +`earthaccess.login()` method and manually entering your EDL account credentials. ```python import earthaccess @@ -48,27 +65,26 @@ import earthaccess earthaccess.login() ``` -Note you can pass `persist=True` to `earthaccess.login()` to have the EDL account credentials you enter -automatically saved to a `~/.netrc` file for future use. - +Note you can pass `persist=True` to `earthaccess.login()` to have the EDL +account credentials you enter automatically saved to your `.netrc` file (see +above) for future use. Once you are authenticated with NASA EDL you can: * Get a file from a DAAC using a `fsspec` session. -* Request temporary S3 credentials from a particular DAAC (needed to download or stream data from an S3 bucket in the cloud). +* Request temporary S3 credentials from a particular DAAC (needed to download or + stream data from an S3 bucket in the cloud). * Use the library to download or stream data directly from S3. * Regenerate CMR tokens (used for restricted datasets). +## Earthdata User Acceptance Testing (UAT) environment -### Earthdata User Acceptance Testing (UAT) environment - -If your EDL account is authorized to access the User Acceptance Testing (UAT) system, -you can set earthaccess to work with its EDL and CMR endpoints -by setting the `system` argument at login, as follows: +If your EDL account is authorized to access the User Acceptance Testing (UAT) +system, you can set earthaccess to work with its EDL and CMR endpoints by +setting the `system` argument at login, as follows: ```python import earthaccess earthaccess.login(system=earthaccess.UAT) - ``` diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 6d7d0def..c82c23d3 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -21,7 +21,7 @@ ) from .auth import Auth from .kerchunk import consolidate_metadata -from .search import DataCollections, DataGranules +from .search import DataCollection, DataCollections, DataGranule, DataGranules from .services import DataServices from .store import Store from .system import PROD, UAT @@ -46,7 +46,9 @@ "download", "auth_environ", # search.py + "DataGranule", "DataGranules", + "DataCollection", "DataCollections", "DataServices", # auth.py diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 4332379a..3a3b209c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -25,6 +25,24 @@ logger = logging.getLogger(__name__) +def netrc_path() -> Path: + """Return the path of the `.netrc` file. + + The path may or may not exist. + + See [the `.netrc` file](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). + + Returns: + `Path` of the `NETRC` environment variable, if the value is non-empty; + otherwise, the path of the platform-specific default location: + `~/_netrc` on Windows systems, `~/.netrc` on non-Windows systems. + """ + sys_netrc_name = "_netrc" if platform.system() == "Windows" else ".netrc" + env_netrc = os.environ.get("NETRC") + + return Path(env_netrc) if env_netrc else Path.home() / sys_netrc_name + + class SessionWithHeaderRedirection(requests.Session): """Requests removes auth headers if the redirect happens outside the original req domain. @@ -104,11 +122,12 @@ def login( if self.authenticated and (system == self.system): logger.debug("We are already authenticated with NASA EDL") return self + if strategy == "interactive": self._interactive(persist) - if strategy == "netrc": + elif strategy == "netrc": self._netrc() - if strategy == "environment": + elif strategy == "environment": self._environment() return self @@ -222,25 +241,29 @@ def _interactive(self, persist_credentials: bool = False) -> bool: if authenticated: logger.debug("Using user provided credentials for EDL") if persist_credentials: - logger.info("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated def _netrc(self) -> bool: + netrc_loc = netrc_path() + try: - my_netrc = Netrc() + my_netrc = Netrc(str(netrc_loc)) except FileNotFoundError as err: - raise FileNotFoundError(f"No .netrc found in {Path.home()}") from err + raise FileNotFoundError(f"No .netrc found at {netrc_loc}") from err except NetrcParseError as err: - raise NetrcParseError("Unable to parse .netrc") from err + raise NetrcParseError(f"Unable to parse .netrc file {netrc_loc}") from err + if (creds := my_netrc[self.system.edl_hostname]) is None: return False username = creds["login"] password = creds["password"] authenticated = self._get_credentials(username, password) + if authenticated: logger.debug("Using .netrc file for EDL") + return authenticated def _environment(self) -> bool: @@ -293,33 +316,41 @@ def _find_or_create_token(self, username: str, password: str) -> Any: def _persist_user_credentials(self, username: str, password: str) -> bool: # See: https://github.com/sloria/tinynetrc/issues/34 + + netrc_loc = netrc_path() + logger.info(f"Persisting credentials to {netrc_loc}") + try: - netrc_path = Path().home().joinpath(".netrc") - netrc_path.touch(exist_ok=True) - netrc_path.chmod(0o600) + netrc_loc.touch(exist_ok=True) + netrc_loc.chmod(0o600) except Exception as e: logger.error(e) return False - my_netrc = Netrc(str(netrc_path)) + + my_netrc = Netrc(str(netrc_loc)) my_netrc[self.system.edl_hostname] = { "login": username, "password": password, } my_netrc.save() + urs_cookies_path = Path.home() / ".urs_cookies" + if not urs_cookies_path.exists(): urs_cookies_path.write_text("") # Create and write to .dodsrc file dodsrc_path = Path.home() / ".dodsrc" + if not dodsrc_path.exists(): dodsrc_contents = ( - f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_path}" + f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_loc}" ) dodsrc_path.write_text(dodsrc_contents) if platform.system() == "Windows": local_dodsrc_path = Path.cwd() / dodsrc_path.name + if not local_dodsrc_path.exists(): shutil.copy2(dodsrc_path, local_dodsrc_path) diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 26758184..9ee40dec 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -1,34 +1,39 @@ from __future__ import annotations +from typing import Optional, Union + import fsspec +import fsspec.utils import s3fs import earthaccess def _get_chunk_metadata( - granule: earthaccess.results.DataGranule, - fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, + granule: earthaccess.DataGranule, + fs: fsspec.AbstractFileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() metadata.append(m) + return metadata def consolidate_metadata( - granules: list[earthaccess.results.DataGranule], - kerchunk_options: dict | None = None, + granules: list[earthaccess.DataGranule], + kerchunk_options: Optional[dict] = None, access: str = "direct", - outfile: str | None = None, - storage_options: dict | None = None, -) -> str | dict: + outfile: Optional[str] = None, + storage_options: Optional[dict] = None, +) -> Union[str, dict]: try: import dask @@ -44,15 +49,16 @@ def consolidate_metadata( fs = earthaccess.get_fsspec_https_session() # Get metadata for each granule - get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) + get_chunk_metadata = dask.delayed(_get_chunk_metadata) # type: ignore + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) # type: ignore chunks = sum(chunks, start=[]) # Get combined metadata object mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) - if outfile is not None: - output = fsspec.utils.stringify_path(outfile) - mzz.translate(outfile, storage_options=storage_options or {}) - return output - else: + + if outfile is None: return mzz.translate() + + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh index 7a7b6551..22c8cb66 100755 --- a/scripts/integration-test.sh +++ b/scripts/integration-test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -x -pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing ${@} --capture=no --tb=native --log-cli-level=INFO +pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO RET=$? set +x diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c2d4a3c2..db71b54f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,7 @@ +import os +import pathlib + +import earthaccess import pytest ACCEPTABLE_FAILURE_RATE = 10 @@ -29,3 +33,37 @@ def pytest_sessionfinish(session, exitstatus): failure_rate = (100.0 * session.testsfailed) / session.testscollected if failure_rate <= ACCEPTABLE_FAILURE_RATE: session.exitstatus = 99 + + +@pytest.fixture +def mock_env(monkeypatch): + earthaccess.__auth__ = earthaccess.Auth() + # the original comes from github secrets + monkeypatch.setenv("EARTHDATA_USERNAME", os.getenv("EARTHACCESS_TEST_USERNAME", "")) + monkeypatch.setenv("EARTHDATA_PASSWORD", os.getenv("EARTHACCESS_TEST_PASSWORD", "")) + + +@pytest.fixture +def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc_path = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc_path)) + monkeypatch.delenv("EARTHDATA_USERNAME") + monkeypatch.delenv("EARTHDATA_PASSWORD") + # Currently, due to there being only a single, global, module-level auth + # value, tests using different auth strategies interfere with each other, + # so here we are deleting the auth attribute so that it doesn't interfere. + monkeypatch.delattr(earthaccess, "__auth__", raising=False) + + +@pytest.fixture +def mock_netrc(mock_env, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc)) + + username = os.environ["EARTHDATA_USERNAME"] + password = os.environ["EARTHDATA_PASSWORD"] + + netrc.write_text( + f"machine urs.earthdata.nasa.gov login {username} password {password}\n" + ) + netrc.chmod(0o600) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 8fd45489..5e90cf46 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -1,21 +1,11 @@ -# package imports import logging import os -import unittest from pathlib import Path import earthaccess import pytest logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") dataset_valid_params = [ @@ -42,36 +32,36 @@ ] -def test_auth_returns_valid_auth_class(): +def test_auth_returns_valid_auth_class(mock_env): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_dataset_search_returns_none_with_no_parameters(): +def test_dataset_search_returns_none_with_no_parameters(mock_env): results = earthaccess.search_datasets() - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) == 0) + assert isinstance(results, list) + assert len(results) == 0 @pytest.mark.parametrize("kwargs", dataset_valid_params) -def test_dataset_search_returns_valid_results(kwargs): +def test_dataset_search_returns_valid_results(mock_env, kwargs): results = earthaccess.search_datasets(**kwargs) - assertions.assertIsInstance(results, list) - assertions.assertIsInstance(results[0], dict) + assert isinstance(results, list) + assert isinstance(results[0], dict) @pytest.mark.parametrize("kwargs", granules_valid_params) -def test_granules_search_returns_valid_results(kwargs): +def test_granules_search_returns_valid_results(mock_env, kwargs): results = earthaccess.search_data(count=10, **kwargs) - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) <= 10) + assert isinstance(results, list) + assert len(results) <= 10 @pytest.mark.parametrize("selection", [0, slice(None)]) @pytest.mark.parametrize("use_url", [True, False]) -def test_download(tmp_path, selection, use_url): +def test_download(mock_env, tmp_path, selection, use_url): results = earthaccess.search_data( count=2, short_name="ATL08", @@ -80,15 +70,15 @@ def test_download(tmp_path, selection, use_url): ) if use_url: # Download via file URL string instead of DataGranule object - results = [r.data_links(access="indirect") for r in results] - results = sum(results, start=[]) # flatten to a list of strings + results = [link for r in results for link in r.data_links(access="indirect")] result = results[selection] files = earthaccess.download(result, str(tmp_path)) - assertions.assertIsInstance(files, list) + assert isinstance(files, list) assert all(Path(f).exists() for f in files) -def test_auth_environ(): +def test_auth_environ(mock_env): + earthaccess.login(strategy="environment") environ = earthaccess.auth_environ() assert environ == { "EARTHDATA_USERNAME": os.environ["EARTHDATA_USERNAME"], diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index 7c0c1b37..e333e48c 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -1,90 +1,54 @@ -# package imports import logging -import os -import pathlib -import unittest import earthaccess +import earthaccess.daac import pytest import requests import s3fs logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") -NETRC_PATH = pathlib.Path.home() / pathlib.Path(".netrc") - -def activate_environment(): - earthaccess.__auth__ = earthaccess.Auth() - # the original comes from github secrets - os.environ["EARTHDATA_USERNAME"] = os.getenv("EARTHACCESS_TEST_USERNAME", "") - os.environ["EARTHDATA_PASSWORD"] = os.getenv("EARTHACCESS_TEST_PASSWORD", "") - - -def activate_netrc(): - activate_environment() - username = os.environ["EARTHDATA_USERNAME"] - password = os.environ["EARTHDATA_PASSWORD"] - - with open(NETRC_PATH, "w") as f: - f.write( - f"machine urs.earthdata.nasa.gov login {username} password {password}\n" - ) - NETRC_PATH.chmod(0o600) - - -def delete_netrc(): - if NETRC_PATH.exists(): - NETRC_PATH.unlink() - - -def test_auth_can_read_earthdata_env_variables(): - activate_environment() +def test_auth_can_read_earthdata_env_variables(mock_env): auth = earthaccess.login(strategy="environment") logger.info(f"Current username: {auth.username}") logger.info(f"earthaccess version: {earthaccess.__version__}") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_read_from_netrc_file(): - activate_netrc() +def test_auth_can_read_from_netrc_file(mock_netrc): auth = earthaccess.login(strategy="netrc") - assertions.assertTrue(auth.authenticated) - delete_netrc() + assert auth.authenticated -def test_auth_throws_exception_if_netrc_is_not_present(): - activate_environment() - delete_netrc() - with pytest.raises(Exception): +def test_auth_throws_exception_if_netrc_is_not_present(mock_missing_netrc): + with pytest.raises(FileNotFoundError): earthaccess.login(strategy="netrc") - assertions.assertRaises(FileNotFoundError) -def test_auth_populates_attrs(): - activate_environment() +def test_auth_populates_attrs(mock_env): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_create_authenticated_requests_sessions(): - activate_environment() +def test_auth_can_create_authenticated_requests_sessions(mock_env): session = earthaccess.get_requests_https_session() - assertions.assertTrue("Authorization" in session.headers) - assertions.assertTrue("Bearer" in session.headers["Authorization"]) + assert "Authorization" in session.headers + assert "Bearer" in session.headers["Authorization"] # type: ignore -@pytest.mark.parametrize("daac", earthaccess.daac.DAACS) -def test_auth_can_fetch_s3_credentials(daac): - activate_environment() +@pytest.mark.parametrize( + "daac", [daac for daac in earthaccess.daac.DAACS if daac["s3-credentials"]] +) +def test_auth_can_fetch_s3_credentials(mock_env, daac): auth = earthaccess.login(strategy="environment") assert auth.authenticated + try: credentials = earthaccess.get_s3_credentials(daac["short-name"]) except requests.RequestException as e: @@ -95,10 +59,10 @@ def test_auth_can_fetch_s3_credentials(daac): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_credentials_lowercase_location(location): - activate_environment() +def test_get_s3_credentials_lowercase_location(mock_env, location): earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) + assert creds assert all( creds[key] @@ -107,9 +71,9 @@ def test_get_s3_credentials_lowercase_location(location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_filesystem_lowercase_location(location): - activate_environment() +def test_get_s3_filesystem_lowercase_location(mock_env, location): earthaccess.login(strategy="environment") fs = earthaccess.get_s3_filesystem(**location) + assert isinstance(fs, s3fs.S3FileSystem) assert all(fs.storage_options[key] for key in ["key", "secret", "token"]) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4e8f9519..8ce144e7 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -1,9 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest from pathlib import Path import earthaccess @@ -56,20 +53,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -83,11 +66,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -96,7 +77,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): @pytest.mark.parametrize("daac", daac_list) -def test_earthaccess_can_download_cloud_collection_granules(daac): +def test_earthaccess_can_download_cloud_collection_granules(mock_env, tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -109,17 +90,17 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) assert isinstance(granules, list) and len(granules) > 0 - assert isinstance(granules[0], earthaccess.results.DataGranule) - local_path = f"./tests/integration/data/{concept_id}" + assert isinstance(granules[0], earthaccess.DataGranule) granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -132,14 +113,16 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) - # We are testing this method + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) + try: - store.get(granules_to_download, local_path=local_path) - except Exception: - logger.warning(Exception) + # We are testing this method + store.get(granules_to_download, local_path=path) + except Exception as e: + logger.warning(e) - path = Path(local_path) - assert path.is_dir() # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) @@ -156,11 +139,11 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): ) -def test_multi_file_granule(tmp_path): +def test_multi_file_granule(mock_env, tmp_path): # Ensure granules that contain multiple files are handled correctly granules = earthaccess.search_data(short_name="HLSL30", count=1) assert len(granules) == 1 urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.download(granules, str(tmp_path)) - assert set([Path(f).name for f in urls]) == set([Path(f).name for f in files]) + assert {Path(f).name for f in urls} == {Path(f).name for f in files} diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index b69eba15..f71e36bb 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -55,20 +52,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -82,11 +65,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -95,14 +76,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(daac): +def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -115,17 +93,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -143,10 +122,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: @@ -163,4 +143,4 @@ def test_multi_file_granule(): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.open(granules) - assert set(urls) == set(f.path for f in files) + assert set(urls) == {f.path for f in files} diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 2e981cce..e92fffe2 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -1,6 +1,4 @@ import logging -import os -import unittest from pathlib import Path import earthaccess @@ -11,29 +9,20 @@ pytest.importorskip("dask") logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") @pytest.fixture(scope="module") def granules(): - granules = earthaccess.search_data( + return earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granules @pytest.mark.parametrize("protocol", ["", "file://"]) def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" - assert not Path(outfile).exists() result = earthaccess.consolidate_metadata( granules, outfile=outfile, @@ -44,7 +33,7 @@ def test_consolidate_metadata_outfile(tmp_path, granules, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granules): +def test_consolidate_metadata_memory(granules): result = earthaccess.consolidate_metadata( granules, access="indirect", @@ -61,10 +50,7 @@ def test_consolidate_metadata(tmp_path, granules, output): expected = xr.open_mfdataset(earthaccess.open(granules)) # Open with kerchunk consolidated metadata file - if output == "file": - kwargs = {"outfile": tmp_path / "metadata.json"} - else: - kwargs = {} + kwargs = {"outfile": tmp_path / "metadata.json"} if output == "file" else {} metadata = earthaccess.consolidate_metadata( granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 242a3c26..5c636e30 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -1,10 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest -from pathlib import Path import earthaccess import pytest @@ -38,30 +34,22 @@ "granules_sample_size": 2, "granules_max_size_mb": 100, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 3, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, + # + # ORNLDAAC no longer has any on-prem collections. This returns 0 collections: + # https://cmr.earthdata.nasa.gov/search/collections?data_center=ORNL_DAAC&cloud_hosted=false + # The following is commented out because the test in this file will now always fail + # because there are no longer any on-prem collections. + # + # { + # "short_name": "ORNLDAAC", + # "collections_count": 100, + # "collections_sample_size": 3, + # "granules_count": 100, + # "granules_sample_size": 2, + # "granules_max_size_mb": 50, + # }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -75,11 +63,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -88,14 +74,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_download_onprem_collection_granules(daac): +def test_earthaccess_can_download_onprem_collection_granules(mock_env, tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -108,22 +91,22 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -136,14 +119,15 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) # We are testing this method - downloaded_results = store.get(granules_to_download, local_path=local_path) + downloaded_results = store.get(granules_to_download, local_path=path) - assertions.assertTrue(isinstance(downloaded_results, list)) - assertions.assertTrue(len(downloaded_results) == granules_sample_size) + assert isinstance(downloaded_results, list) + assert len(downloaded_results) >= granules_sample_size - path = Path(local_path) - assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 2a455c44..1135b7ba 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -37,30 +34,22 @@ "granules_sample_size": 2, "granules_max_size_mb": 130, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 2, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, + # + # ORNLDAAC no longer has any on-prem collections. This returns 0 collections: + # https://cmr.earthdata.nasa.gov/search/collections?data_center=ORNL_DAAC&cloud_hosted=false + # The following is commented out because the test in this file will now always fail + # because there are no longer any on-prem collections. + # + # { + # "short_name": "ORNLDAAC", + # "collections_count": 100, + # "collections_sample_size": 2, + # "granules_count": 100, + # "granules_sample_size": 2, + # "granules_max_size_mb": 50, + # }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -74,11 +63,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -87,14 +74,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(daac): +def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -107,17 +91,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -135,10 +120,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: From 40aa52c894292cc9264a0feeda0b8cb885562c71 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 16 Sep 2024 20:12:22 -0400 Subject: [PATCH 2/2] Revert pull_request_target --- .github/workflows/integration-test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 360f71ed..f78a9430 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1,10 +1,7 @@ name: Integration Tests on: - pull_request_target: - types: - - opened - - synchronize + pull_request: push: branches: - main