diff --git a/.github/actions/install-pkg/action.yml b/.github/actions/install-pkg/action.yml index 5e8bd9e4..4b4e2d92 100644 --- a/.github/actions/install-pkg/action.yml +++ b/.github/actions/install-pkg/action.yml @@ -21,4 +21,4 @@ runs: - name: Install package and test dependencies shell: bash - run: pip install .[test] + run: pip install --root-user-action ignore ".[test]" diff --git a/.github/workflows/integration-test-review.yml b/.github/workflows/integration-test-review.yml new file mode 100644 index 00000000..b9cefb9c --- /dev/null +++ b/.github/workflows/integration-test-review.yml @@ -0,0 +1,52 @@ +name: Integration Tests PR Comment + +on: + workflow_run: + workflows: [Integration Tests] + types: [completed] + +jobs: + integration-tests-pr-comment: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request_target' && + github.event.workflow_run.conclusion == 'failure' + steps: + - name: Fetch user permission + id: permission + uses: actions-cool/check-user-permission@v2 + with: + require: write + username: ${{ github.triggering_actor }} + + - name: Add PR comment when user does not have write permission + # The name of the output require-result is a bit confusing, but when its value + # is 'false', it means that the triggering actor does NOT have the required + # permission. + if: steps.permission.outputs.require-result == 'false' + + # If the triggering actor does not have write permission, then we want to add + # a PR comment indicating a security review is required because we know that + # the integration tests "failed" due to lack of permission (i.e., they were + # actually "aborted" without running any tests). + uses: actions/github-script@v7 + with: + script: | + const { number, html_url } = (await github.rest.repos.listPullRequestsAssociatedWithCommit({ + commit_sha: context.sha, + owner: "${{ github.event.workflow_run.head_repository.owner.login }}", + repo: "${{ github.event.workflow_run.head_repository.name }}", + })).data[0] ?? {}; + + if (number) { + github.rest.issues.createComment({ + issue_number: number, + owner: context.repo.owner, + repo: context.repo.repo, + body: "User [${{ github.triggering_actor }}](${{ github.event.workflow_run.head_repository.owner.html_url }})" + + " does not have permission to run integration tests. A maintainer must perform a security review of the" + + ` [code changes in this pull request](${html_url}/files) and re-run the` + + " [failed integration tests jobs](${{ github.event.workflow_run.html_url }})," + + " if the code is deemed safe.", + }); + } diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index fba60ccc..b868549d 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -47,6 +47,7 @@ jobs: steps: - name: Fetch user permission + if: github.event_name == 'pull_request_target' id: permission uses: actions-cool/check-user-permission@v2 with: @@ -54,7 +55,11 @@ jobs: username: ${{ github.triggering_actor }} - name: Check user permission - if: ${{ steps.permission.outputs.require-result == 'false' }} + # The name of the output require-result is a bit confusing, but when its value + # is 'false', it means that the triggering actor does NOT have the required + # permission. + if: github.event_name == 'pull_request_target' && steps.permission.outputs.require-result == 'false' + # If the triggering actor does not have write permission (i.e., this is a # PR from a fork), then we exit, otherwise most of the integration tests will # fail because they require access to secrets. In this case, a maintainer @@ -78,8 +83,6 @@ jobs: env: EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} - EARTHACCESS_TEST_USERNAME: ${{ secrets.EDL_USERNAME }} - EARTHACCESS_TEST_PASSWORD: ${{ secrets.EDL_PASSWORD }} run: ./scripts/integration-test.sh - name: Upload coverage report diff --git a/.github/workflows/test-mindeps.yml b/.github/workflows/test-mindeps.yml index 3be5bb01..050923f0 100644 --- a/.github/workflows/test-mindeps.yml +++ b/.github/workflows/test-mindeps.yml @@ -30,7 +30,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version-file: pyproject.toml + python-version: 3.9 - name: Install minimum-compatible dependencies run: uv sync --resolution lowest-direct --extra test diff --git a/.gitignore b/.gitignore index d0ee3ea9..f3567622 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ htmlcov dist site .coverage +.coverage.* coverage.xml .netlify test.db diff --git a/CHANGELOG.md b/CHANGELOG.md index eee38410..c13df1ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,7 @@ ### Added -- Added Issue Templates - ([#281](https://github.com/nsidc/earthaccess/issues/281)) +- Add Issue Templates ([#281](https://github.com/nsidc/earthaccess/issues/281)) ([**@Sherwin-14**](https://github.com/Sherwin-14)) - Support Service queries ([#447](https://github.com/nsidc/earthaccess/issues/447)) @@ -25,20 +24,29 @@ [**@chuckwondo**](https://github.com/chuckwondo), [**@mfisher87**](https://github.com/mfisher87), [**@betolink**](https://github.com/betolink)) - -- Added example PR links to pull request template +- Support use of `NETRC` environment variable to override default `.netrc` file + location ([#480](https://github.com/nsidc/earthaccess/issues/480)) + ([**@chuckwondo**](https://github.com/chuckwondo)) +- Add example PR links to pull request template ([#756](https://github.com/nsidc/earthaccess/issues/756)) - [**@Sherwin-14**](https://github.com/Sherwin-14), - [**@mfisher87**](https://github.com/mfisher87) - -- Added Contributing Naming Convention document + ([**@Sherwin-14**](https://github.com/Sherwin-14), + [**@mfisher87**](https://github.com/mfisher87)) +- Add Contributing Naming Convention document ([#532](https://github.com/nsidc/earthaccess/issues/532)) - [**@Sherwin-14**](https://github.com/Sherwin-14), - [**@mfisher87**](https://github.com/mfisher87) + ([**@Sherwin-14**](https://github.com/Sherwin-14), + [**@mfisher87**](https://github.com/mfisher87)) + +### Removed + +- Remove `binder/` directory, as we no longer need a special + [binder](https://mybinder.org) environment with the top-level + `environment.yml` introduced in + [#733](https://github.com/nsidc/earthaccess/issues/733) + ([@jhkennedy](https://github.com/jhkennedy)) ### Fixed -- Removed Broken Link "Introduction to NASA earthaccess" +- Remove broken link "Introduction to NASA earthaccess" ([#779](https://github.com/nsidc/earthaccess/issues/779)) ([**@Sherwin-14**](https://github.com/Sherwin-14)) - Restore automation for tidying notebooks used in documentation @@ -47,13 +55,9 @@ - Remove the base class on `EarthAccessFile` to fix method resolution ([#610](https://github.com/nsidc/earthaccess/issues/610)) ([**@itcarroll**](https://github.com/itcarroll)) - -### Removed - -- Remove `binder/` directory, as we no longer need a special [binder](https://mybinder.org) - environment with the top-level `environment.yml` introduced in - [#733](https://github.com/nsidc/earthaccess/issues/733) - ([@jhkennedy](https://github.com/jhkennedy)) +- Integration tests no longer clobber existing `.netrc` file + ([#806](https://github.com/nsidc/earthaccess/issues/806)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ## [0.10.0] 2024-07-19 diff --git a/docs/contributing/development.md b/docs/contributing/development.md index 1c4baa1a..5371a247 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -17,20 +17,29 @@ If you don't have pipx (pip for applications), then you can install with pip is reasonable). If you use macOS, then pipx and nox are both in brew, use `brew install pipx nox`. -To use, run `nox`. This will typecheck and test using every installed version of -Python on your system, skipping ones that are not installed. You can also run -specific jobs: +To use, run `nox` without any arguments. This will run type checks and unit +tests using the installed version of Python on your system. + +You can also run individual tasks (_sessions_ in `nox` parlance, hence the `-s` +option below), like so: ```console -$ nox -s typecheck # Typecheck only -$ nox -s tests # Python tests -$ nox -s build_docs -- --serve # Build and serve the docs -$ nox -s build_pkg # Make an SDist and wheel +nox -s typecheck # Run typechecks +nox -s tests # Run unit tests +nox -s integration-tests # Run integration tests (see note below) +nox -s build_docs -- --serve # Build and serve the docs +nox -s build_pkg # Build an SDist and wheel ``` Nox handles everything for you, including setting up a temporary virtual environment for each run. +**NOTE:** In order to run integration tests locally, you must set the +environment variables `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` to your +username and password, respectively, of your +[NASA Earthdata](https://urs.earthdata.nasa.gov/) account (registration is +free). + ## Manual development environment setup While `nox` is the fastest way to get started, you will likely need a full diff --git a/docs/howto/authenticate.md b/docs/howto/authenticate.md index 407e5140..ec94f5ea 100644 --- a/docs/howto/authenticate.md +++ b/docs/howto/authenticate.md @@ -1,10 +1,14 @@ -## Authenticate with Earthdata Login +# Authenticate with Earthdata Login -The first step to use NASA Earthdata is to create an account with Earthdata Login, please follow the instructions at [NASA EDL](https://urs.earthdata.nasa.gov/) +The first step to use NASA Earthdata is to create an account with Earthdata +Login, please follow the instructions at +[NASA EDL](https://urs.earthdata.nasa.gov/) -Once registered, earthaccess can use environment variables, a `.netrc` file or interactive input from a user to login with NASA EDL. +Once registered, earthaccess can use environment variables, a `.netrc` file or +interactive input from a user to login with NASA EDL. -If a strategy is not especified, env vars will be used first, then netrc and finally user's input. +If a strategy is not specified, environment variables will be used first, then +a `.netrc` (if found, see below), and finally a user's input. ```py import earthaccess @@ -12,35 +16,48 @@ import earthaccess auth = earthaccess.login() ``` -If you have a .netrc file with your Earthdata Login credentials +If you have a `.netrc` file (see below) with your Earthdata Login credentials, +you can explicitly specify its use: ```py auth = earthaccess.login(strategy="netrc") ``` -If your Earthdata Login credentials are set as environment variables: EARTHDATA_USERNAME, EARTHDATA_PASSWORD +If your Earthdata Login credentials are set as the environment variables +`EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD`, you can explicitly specify their +use: ```py auth = earthaccess.login(strategy="environment") ``` -If you wish to enter your Earthdata Login credentials when prompted with optional persistence to .netrc +If you wish to enter your Earthdata Login credentials when prompted, with +optional persistence to your `.netrc` file (see below), specify the interactive +strategy: ```py auth = earthaccess.login(strategy="interactive", persist=True) ``` +## Authentication +By default, `earthaccess` with automatically look for your EDL account +credentials in two locations: -### **Authentication** +1. A `.netrc` file: By default, this is either `~/_netrc` (on a Windows system) + or `~/.netrc` (on a non-Windows system). On *any* system, you may override + the default location by setting the `NETRC` environment variable to the path + of your desired `.netrc` file. -By default, `earthaccess` with automatically look for your EDL account credentials in two locations: - -1. A `~/.netrc` file + **NOTE**: When setting the `NETRC` environment variable, there is no + requirement to use a specific filename. The name `.netrc` is common, but + used throughout documentation primarily for convenience. The only + requirement is that the *contents* of the file adhere to the + [`.netrc` file format](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). 2. `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` environment variables -If neither of these options are configured, you can authenticate by calling the `earthaccess.login()` method -and manually entering your EDL account credentials. +If neither of these options are configured, you can authenticate by calling the +`earthaccess.login()` method and manually entering your EDL account credentials. ```python import earthaccess @@ -48,27 +65,26 @@ import earthaccess earthaccess.login() ``` -Note you can pass `persist=True` to `earthaccess.login()` to have the EDL account credentials you enter -automatically saved to a `~/.netrc` file for future use. - +Note you can pass `persist=True` to `earthaccess.login()` to have the EDL +account credentials you enter automatically saved to your `.netrc` file (see +above) for future use. Once you are authenticated with NASA EDL you can: * Get a file from a DAAC using a `fsspec` session. -* Request temporary S3 credentials from a particular DAAC (needed to download or stream data from an S3 bucket in the cloud). +* Request temporary S3 credentials from a particular DAAC (needed to download or + stream data from an S3 bucket in the cloud). * Use the library to download or stream data directly from S3. * Regenerate CMR tokens (used for restricted datasets). +## Earthdata User Acceptance Testing (UAT) environment -### Earthdata User Acceptance Testing (UAT) environment - -If your EDL account is authorized to access the User Acceptance Testing (UAT) system, -you can set earthaccess to work with its EDL and CMR endpoints -by setting the `system` argument at login, as follows: +If your EDL account is authorized to access the User Acceptance Testing (UAT) +system, you can set earthaccess to work with its EDL and CMR endpoints by +setting the `system` argument at login, as follows: ```python import earthaccess earthaccess.login(system=earthaccess.UAT) - ``` diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 6d7d0def..73f7ed2d 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -1,6 +1,7 @@ import logging import threading from importlib.metadata import version +from typing import Optional from .api import ( auth_environ, @@ -21,7 +22,7 @@ ) from .auth import Auth from .kerchunk import consolidate_metadata -from .search import DataCollections, DataGranules +from .search import DataCollection, DataCollections, DataGranule, DataGranules from .services import DataServices from .store import Store from .system import PROD, UAT @@ -46,7 +47,9 @@ "download", "auth_environ", # search.py + "DataGranule", "DataGranules", + "DataCollection", "DataCollections", "DataServices", # auth.py @@ -62,7 +65,7 @@ __version__ = version("earthaccess") _auth = Auth() -_store = None +_store: Optional[Store] = None _lock = threading.Lock() diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 4332379a..3a3b209c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -25,6 +25,24 @@ logger = logging.getLogger(__name__) +def netrc_path() -> Path: + """Return the path of the `.netrc` file. + + The path may or may not exist. + + See [the `.netrc` file](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). + + Returns: + `Path` of the `NETRC` environment variable, if the value is non-empty; + otherwise, the path of the platform-specific default location: + `~/_netrc` on Windows systems, `~/.netrc` on non-Windows systems. + """ + sys_netrc_name = "_netrc" if platform.system() == "Windows" else ".netrc" + env_netrc = os.environ.get("NETRC") + + return Path(env_netrc) if env_netrc else Path.home() / sys_netrc_name + + class SessionWithHeaderRedirection(requests.Session): """Requests removes auth headers if the redirect happens outside the original req domain. @@ -104,11 +122,12 @@ def login( if self.authenticated and (system == self.system): logger.debug("We are already authenticated with NASA EDL") return self + if strategy == "interactive": self._interactive(persist) - if strategy == "netrc": + elif strategy == "netrc": self._netrc() - if strategy == "environment": + elif strategy == "environment": self._environment() return self @@ -222,25 +241,29 @@ def _interactive(self, persist_credentials: bool = False) -> bool: if authenticated: logger.debug("Using user provided credentials for EDL") if persist_credentials: - logger.info("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated def _netrc(self) -> bool: + netrc_loc = netrc_path() + try: - my_netrc = Netrc() + my_netrc = Netrc(str(netrc_loc)) except FileNotFoundError as err: - raise FileNotFoundError(f"No .netrc found in {Path.home()}") from err + raise FileNotFoundError(f"No .netrc found at {netrc_loc}") from err except NetrcParseError as err: - raise NetrcParseError("Unable to parse .netrc") from err + raise NetrcParseError(f"Unable to parse .netrc file {netrc_loc}") from err + if (creds := my_netrc[self.system.edl_hostname]) is None: return False username = creds["login"] password = creds["password"] authenticated = self._get_credentials(username, password) + if authenticated: logger.debug("Using .netrc file for EDL") + return authenticated def _environment(self) -> bool: @@ -293,33 +316,41 @@ def _find_or_create_token(self, username: str, password: str) -> Any: def _persist_user_credentials(self, username: str, password: str) -> bool: # See: https://github.com/sloria/tinynetrc/issues/34 + + netrc_loc = netrc_path() + logger.info(f"Persisting credentials to {netrc_loc}") + try: - netrc_path = Path().home().joinpath(".netrc") - netrc_path.touch(exist_ok=True) - netrc_path.chmod(0o600) + netrc_loc.touch(exist_ok=True) + netrc_loc.chmod(0o600) except Exception as e: logger.error(e) return False - my_netrc = Netrc(str(netrc_path)) + + my_netrc = Netrc(str(netrc_loc)) my_netrc[self.system.edl_hostname] = { "login": username, "password": password, } my_netrc.save() + urs_cookies_path = Path.home() / ".urs_cookies" + if not urs_cookies_path.exists(): urs_cookies_path.write_text("") # Create and write to .dodsrc file dodsrc_path = Path.home() / ".dodsrc" + if not dodsrc_path.exists(): dodsrc_contents = ( - f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_path}" + f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_loc}" ) dodsrc_path.write_text(dodsrc_contents) if platform.system() == "Windows": local_dodsrc_path = Path.cwd() / dodsrc_path.name + if not local_dodsrc_path.exists(): shutil.copy2(dodsrc_path, local_dodsrc_path) diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 26758184..9ee40dec 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -1,34 +1,39 @@ from __future__ import annotations +from typing import Optional, Union + import fsspec +import fsspec.utils import s3fs import earthaccess def _get_chunk_metadata( - granule: earthaccess.results.DataGranule, - fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, + granule: earthaccess.DataGranule, + fs: fsspec.AbstractFileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() metadata.append(m) + return metadata def consolidate_metadata( - granules: list[earthaccess.results.DataGranule], - kerchunk_options: dict | None = None, + granules: list[earthaccess.DataGranule], + kerchunk_options: Optional[dict] = None, access: str = "direct", - outfile: str | None = None, - storage_options: dict | None = None, -) -> str | dict: + outfile: Optional[str] = None, + storage_options: Optional[dict] = None, +) -> Union[str, dict]: try: import dask @@ -44,15 +49,16 @@ def consolidate_metadata( fs = earthaccess.get_fsspec_https_session() # Get metadata for each granule - get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) + get_chunk_metadata = dask.delayed(_get_chunk_metadata) # type: ignore + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) # type: ignore chunks = sum(chunks, start=[]) # Get combined metadata object mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) - if outfile is not None: - output = fsspec.utils.stringify_path(outfile) - mzz.translate(outfile, storage_options=storage_options or {}) - return output - else: + + if outfile is None: return mzz.translate() + + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output diff --git a/earthaccess/store.py b/earthaccess/store.py index 61437542..817f3218 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -63,11 +63,11 @@ def __repr__(self) -> str: def _open_files( url_mapping: Mapping[str, Union[DataGranule, None]], fs: fsspec.AbstractFileSystem, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[EarthAccessFile]: def multi_thread_open(data: tuple) -> EarthAccessFile: - urls, granule = data - return EarthAccessFile(fs.open(urls), granule) + url, granule = data + return EarthAccessFile(fs.open(url), granule) # type: ignore fileset = pqdm(url_mapping.items(), multi_thread_open, n_jobs=threads) return fileset @@ -365,7 +365,7 @@ def _open_granules( self, granules: List[DataGranule], provider: Optional[str] = None, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[Any]: fileset: List = [] total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) @@ -419,7 +419,7 @@ def _open_urls( self, granules: List[str], provider: Optional[str] = None, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[Any]: fileset: List = [] @@ -685,14 +685,14 @@ def _download_onprem_granules( def _open_urls_https( self, url_mapping: Mapping[str, Union[DataGranule, None]], - threads: Optional[int] = 8, + threads: int = 8, ) -> List[fsspec.AbstractFileSystem]: https_fs = self.get_fsspec_session() - if https_fs is not None: - try: - fileset = _open_files(url_mapping, https_fs, threads) - except Exception: - logger.exception( - "An exception occurred while trying to access remote files via HTTPS" - ) - return fileset + + try: + return _open_files(url_mapping, https_fs, threads) + except Exception: + logger.exception( + "An exception occurred while trying to access remote files via HTTPS" + ) + raise diff --git a/noxfile.py b/noxfile.py index 257b0533..a62314f6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import shutil from pathlib import Path @@ -26,6 +27,21 @@ def tests(session: nox.Session) -> None: session.run("pytest", "tests/unit", *session.posargs) +@nox.session(name="integration-tests") +def integration_tests(session: nox.Session) -> None: + """Run the integration tests.""" + session.install("--editable", ".[test]") + session.run( + "scripts/integration-test.sh", + *session.posargs, + env=dict( + EARTHDATA_USERNAME=os.environ["EARTHDATA_USERNAME"], + EARTHDATA_PASSWORD=os.environ["EARTHDATA_PASSWORD"], + ), + external=True, + ) + + @nox.session def build_pkg(session: nox.Session) -> None: """Build a source distribution and binary distribution (wheel).""" diff --git a/pyproject.toml b/pyproject.toml index d8ef87dd..ab024efa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,14 +5,12 @@ build-backend = "hatchling.build" [project] name = "earthaccess" version = "0.10.0" -repository = "https://github.com/nsidc/earthaccess" -documentation = "https://earthaccess.readthedocs.io" description = "Client library for NASA Earthdata APIs" authors = [ {name = "earthaccess contributors"} ] maintainers = [ - {name = "Luis Lopez", email = "betolin@gmail.com"}, + {name = "Luis Lopez", email = "betolink@gmail.com"}, {name = "Joseph H. Kennedy", email = "jhkennedy@alaska.edu"}, {name = "James Bourbeau", email = "james@coiled.io"}, {name = "Matt Fisher", email = "mfisher87@gmail.com"}, @@ -49,16 +47,24 @@ dependencies = [ "multimethod >=1.8", "importlib-resources >=6.3.2", "typing_extensions >=4.10.0", - # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 - # support - "numpy >=1.24.0; python_version < '3.12'", - "numpy >=1.26.0; python_version >= '3.12'", ] +[project.urls] +Repository = "https://github.com/nsidc/earthaccess" +Documentation = "https://earthaccess.readthedocs.io/en/latest/" +"Bug Tracker" = "https://github.com/nsidc/earthaccess/issues" +Changelog = "https://github.com/nsidc/earthaccess/blob/main/CHANGELOG.md" + [project.optional-dependencies] kerchunk = [ "kerchunk", "dask", + "h5py >=3.0", + "h5netcdf", + "xarray", + # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 + "numpy >=1.24.0; python_version < '3.12'", + "numpy >=1.26.0; python_version >= '3.12'", ] dev = [ "bump-my-version >=0.10.0", @@ -77,6 +83,7 @@ test = [ "types-requests >=0.1", "types-setuptools >=0.1", "vcrpy >=6.0.1", + "earthaccess[kerchunk]", ] docs = [ "jupyterlab >=3", diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh index 15b173f8..506976ad 100755 --- a/scripts/integration-test.sh +++ b/scripts/integration-test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -x -pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing ${@} --capture=no --tb=native --log-cli-level=INFO +pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO RET=$? set +x diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c2d4a3c2..8c206885 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,7 @@ +import os +import pathlib + +import earthaccess import pytest ACCEPTABLE_FAILURE_RATE = 10 @@ -29,3 +33,31 @@ def pytest_sessionfinish(session, exitstatus): failure_rate = (100.0 * session.testsfailed) / session.testscollected if failure_rate <= ACCEPTABLE_FAILURE_RATE: session.exitstatus = 99 + + +@pytest.fixture +def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc_path = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc_path)) + monkeypatch.delenv("EARTHDATA_USERNAME") + monkeypatch.delenv("EARTHDATA_PASSWORD") + # Currently, due to there being only a single, global, module-level auth + # value, tests using different auth strategies interfere with each other, + # so here we are monkeypatching a new, unauthenticated Auth object. + auth = earthaccess.Auth() + monkeypatch.setattr(earthaccess, "_auth", auth) + monkeypatch.setattr(earthaccess, "__auth__", auth) + + +@pytest.fixture +def mock_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc)) + + username = os.environ["EARTHDATA_USERNAME"] + password = os.environ["EARTHDATA_PASSWORD"] + + netrc.write_text( + f"machine urs.earthdata.nasa.gov login {username} password {password}\n" + ) + netrc.chmod(0o600) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 8fd45489..f0fdd219 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -1,21 +1,11 @@ -# package imports import logging import os -import unittest from pathlib import Path import earthaccess import pytest logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") dataset_valid_params = [ @@ -44,29 +34,29 @@ def test_auth_returns_valid_auth_class(): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated def test_dataset_search_returns_none_with_no_parameters(): results = earthaccess.search_datasets() - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) == 0) + assert isinstance(results, list) + assert len(results) == 0 @pytest.mark.parametrize("kwargs", dataset_valid_params) def test_dataset_search_returns_valid_results(kwargs): results = earthaccess.search_datasets(**kwargs) - assertions.assertIsInstance(results, list) - assertions.assertIsInstance(results[0], dict) + assert isinstance(results, list) + assert isinstance(results[0], dict) @pytest.mark.parametrize("kwargs", granules_valid_params) def test_granules_search_returns_valid_results(kwargs): results = earthaccess.search_data(count=10, **kwargs) - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) <= 10) + assert isinstance(results, list) + assert len(results) <= 10 @pytest.mark.parametrize("selection", [0, slice(None)]) @@ -80,15 +70,15 @@ def test_download(tmp_path, selection, use_url): ) if use_url: # Download via file URL string instead of DataGranule object - results = [r.data_links(access="indirect") for r in results] - results = sum(results, start=[]) # flatten to a list of strings + results = [link for r in results for link in r.data_links(access="indirect")] result = results[selection] files = earthaccess.download(result, str(tmp_path)) - assertions.assertIsInstance(files, list) + assert isinstance(files, list) assert all(Path(f).exists() for f in files) def test_auth_environ(): + earthaccess.login(strategy="environment") environ = earthaccess.auth_environ() assert environ == { "EARTHDATA_USERNAME": os.environ["EARTHDATA_USERNAME"], diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index 7c0c1b37..1a83833d 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -1,90 +1,54 @@ -# package imports import logging -import os -import pathlib -import unittest import earthaccess +import earthaccess.daac import pytest import requests import s3fs logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -NETRC_PATH = pathlib.Path.home() / pathlib.Path(".netrc") - - -def activate_environment(): - earthaccess.__auth__ = earthaccess.Auth() - # the original comes from github secrets - os.environ["EARTHDATA_USERNAME"] = os.getenv("EARTHACCESS_TEST_USERNAME", "") - os.environ["EARTHDATA_PASSWORD"] = os.getenv("EARTHACCESS_TEST_PASSWORD", "") - - -def activate_netrc(): - activate_environment() - username = os.environ["EARTHDATA_USERNAME"] - password = os.environ["EARTHDATA_PASSWORD"] - - with open(NETRC_PATH, "w") as f: - f.write( - f"machine urs.earthdata.nasa.gov login {username} password {password}\n" - ) - NETRC_PATH.chmod(0o600) - - -def delete_netrc(): - if NETRC_PATH.exists(): - NETRC_PATH.unlink() def test_auth_can_read_earthdata_env_variables(): - activate_environment() auth = earthaccess.login(strategy="environment") logger.info(f"Current username: {auth.username}") logger.info(f"earthaccess version: {earthaccess.__version__}") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_read_from_netrc_file(): - activate_netrc() +def test_auth_can_read_from_netrc_file(mock_netrc): auth = earthaccess.login(strategy="netrc") - assertions.assertTrue(auth.authenticated) - delete_netrc() + assert auth.authenticated -def test_auth_throws_exception_if_netrc_is_not_present(): - activate_environment() - delete_netrc() - with pytest.raises(Exception): +def test_auth_throws_exception_if_netrc_is_not_present(mock_missing_netrc): + with pytest.raises(FileNotFoundError): earthaccess.login(strategy="netrc") - assertions.assertRaises(FileNotFoundError) def test_auth_populates_attrs(): - activate_environment() auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated def test_auth_can_create_authenticated_requests_sessions(): - activate_environment() session = earthaccess.get_requests_https_session() - assertions.assertTrue("Authorization" in session.headers) - assertions.assertTrue("Bearer" in session.headers["Authorization"]) + assert "Authorization" in session.headers + assert "Bearer" in session.headers["Authorization"] # type: ignore -@pytest.mark.parametrize("daac", earthaccess.daac.DAACS) +@pytest.mark.parametrize( + "daac", [daac for daac in earthaccess.daac.DAACS if daac["s3-credentials"]] +) def test_auth_can_fetch_s3_credentials(daac): - activate_environment() auth = earthaccess.login(strategy="environment") assert auth.authenticated + try: credentials = earthaccess.get_s3_credentials(daac["short-name"]) except requests.RequestException as e: @@ -96,9 +60,9 @@ def test_auth_can_fetch_s3_credentials(daac): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) def test_get_s3_credentials_lowercase_location(location): - activate_environment() earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) + assert creds assert all( creds[key] @@ -108,8 +72,8 @@ def test_get_s3_credentials_lowercase_location(location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) def test_get_s3_filesystem_lowercase_location(location): - activate_environment() earthaccess.login(strategy="environment") fs = earthaccess.get_s3_filesystem(**location) + assert isinstance(fs, s3fs.S3FileSystem) assert all(fs.storage_options[key] for key in ["key", "secret", "token"]) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4e8f9519..11fab5a1 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -1,9 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest from pathlib import Path import earthaccess @@ -56,20 +53,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -83,11 +66,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -96,7 +77,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): @pytest.mark.parametrize("daac", daac_list) -def test_earthaccess_can_download_cloud_collection_granules(daac): +def test_earthaccess_can_download_cloud_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -109,17 +90,17 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) assert isinstance(granules, list) and len(granules) > 0 - assert isinstance(granules[0], earthaccess.results.DataGranule) - local_path = f"./tests/integration/data/{concept_id}" + assert isinstance(granules[0], earthaccess.DataGranule) granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -132,14 +113,16 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) - # We are testing this method + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) + try: - store.get(granules_to_download, local_path=local_path) - except Exception: - logger.warning(Exception) + # We are testing this method + store.get(granules_to_download, local_path=path) + except Exception as e: + logger.warning(e) - path = Path(local_path) - assert path.is_dir() # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) @@ -163,4 +146,4 @@ def test_multi_file_granule(tmp_path): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.download(granules, str(tmp_path)) - assert set([Path(f).name for f in urls]) == set([Path(f).name for f in files]) + assert {Path(f).name for f in urls} == {Path(f).name for f in files} diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index b69eba15..a0ca5501 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -55,20 +52,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -82,11 +65,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -95,10 +76,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) @@ -115,17 +93,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -143,10 +122,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: @@ -163,4 +143,4 @@ def test_multi_file_granule(): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.open(granules) - assert set(urls) == set(f.path for f in files) + assert set(urls) == {f.path for f in files} diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 2e981cce..90a71d19 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -1,39 +1,25 @@ import logging -import os -import unittest from pathlib import Path import earthaccess import pytest from fsspec.core import strip_protocol -kerchunk = pytest.importorskip("kerchunk") -pytest.importorskip("dask") - logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") @pytest.fixture(scope="module") def granules(): - granules = earthaccess.search_data( + return earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granules @pytest.mark.parametrize("protocol", ["", "file://"]) def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" - assert not Path(outfile).exists() result = earthaccess.consolidate_metadata( granules, outfile=outfile, @@ -44,7 +30,7 @@ def test_consolidate_metadata_outfile(tmp_path, granules, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granules): +def test_consolidate_metadata_memory(granules): result = earthaccess.consolidate_metadata( granules, access="indirect", @@ -56,15 +42,17 @@ def test_consolidate_metadata_memory(tmp_path, granules): @pytest.mark.parametrize("output", ["file", "memory"]) def test_consolidate_metadata(tmp_path, granules, output): - xr = pytest.importorskip("xarray") + # We import here because xarray is installed only when the kerchunk extra is + # installed, and when type checking is run, kerchunk (and thus xarray) is + # not installed, so mypy barfs when this is a top-level import. Further, + # mypy complains even when imported here, but here we can mark it to ignore. + import xarray as xr # type: ignore + # Open directly with `earthaccess.open` - expected = xr.open_mfdataset(earthaccess.open(granules)) + expected = xr.open_mfdataset(earthaccess.open(granules), engine="h5netcdf") # Open with kerchunk consolidated metadata file - if output == "file": - kwargs = {"outfile": tmp_path / "metadata.json"} - else: - kwargs = {} + kwargs = {"outfile": tmp_path / "metadata.json"} if output == "file" else {} metadata = earthaccess.consolidate_metadata( granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 242a3c26..d728be6c 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -1,10 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest -from pathlib import Path import earthaccess import pytest @@ -38,30 +34,8 @@ "granules_sample_size": 2, "granules_max_size_mb": 100, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 3, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -75,11 +49,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -88,14 +60,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_download_onprem_collection_granules(daac): +def test_earthaccess_can_download_onprem_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -108,22 +77,22 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -136,14 +105,15 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) # We are testing this method - downloaded_results = store.get(granules_to_download, local_path=local_path) + downloaded_results = store.get(granules_to_download, local_path=path) - assertions.assertTrue(isinstance(downloaded_results, list)) - assertions.assertTrue(len(downloaded_results) == granules_sample_size) + assert isinstance(downloaded_results, list) + assert len(downloaded_results) >= granules_sample_size - path = Path(local_path) - assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 2a455c44..dbc41994 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -37,30 +34,8 @@ "granules_sample_size": 2, "granules_max_size_mb": 130, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 2, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -74,11 +49,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -87,10 +60,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) @@ -107,17 +77,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -135,10 +106,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: diff --git a/tests/integration/test_services.py b/tests/integration/test_services.py index 04de0dce..2d1eb34c 100644 --- a/tests/integration/test_services.py +++ b/tests/integration/test_services.py @@ -1,21 +1,6 @@ -# package imports -import logging -import os -import unittest - import earthaccess -from earthaccess.api import search_datasets from vcr.unittest import VCRTestCase # type: ignore[import-untyped] -assertions = unittest.TestCase("__init__") -logger = logging.getLogger(__name__) - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - class TestServices(VCRTestCase): def scrub_access_token(self, string, replacement=""): @@ -41,7 +26,7 @@ def _get_vcr(self, **kwargs): def test_services(self): """Test that a list of services can be retrieved.""" - datasets = search_datasets( + datasets = earthaccess.search_datasets( short_name="MUR-JPL-L4-GLOB-v4.1", cloud_hosted=True, temporal=("2024-02-27T00:00:00Z", "2024-02-29T00:00:00Z"), diff --git a/uv.lock b/uv.lock index 7d3b8fa6..f7826706 100644 --- a/uv.lock +++ b/uv.lock @@ -861,7 +861,6 @@ dependencies = [ { name = "fsspec" }, { name = "importlib-resources" }, { name = "multimethod" }, - { name = "numpy" }, { name = "pqdm" }, { name = "python-cmr" }, { name = "requests" }, @@ -901,10 +900,19 @@ docs = [ ] kerchunk = [ { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, { name = "kerchunk" }, + { name = "numpy" }, + { name = "xarray" }, ] test = [ + { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, + { name = "kerchunk" }, { name = "mypy" }, + { name = "numpy" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-watch" }, @@ -914,6 +922,7 @@ test = [ { name = "types-requests", version = "2.32.0.20240907", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' and python_full_version < '3.12' and platform_python_implementation != 'PyPy'" }, { name = "types-setuptools" }, { name = "vcrpy" }, + { name = "xarray" }, ] [package.metadata] @@ -922,8 +931,11 @@ requires-dist = [ { name = "cftime", marker = "extra == 'docs'", specifier = ">=1.6.4" }, { name = "dask", marker = "extra == 'docs'", specifier = ">=2024.8.0" }, { name = "dask", marker = "extra == 'kerchunk'" }, + { name = "earthaccess", extras = ["kerchunk"], marker = "extra == 'test'" }, { name = "fsspec", specifier = ">=2022.11" }, { name = "h5netcdf", marker = "extra == 'docs'", specifier = ">=0.11" }, + { name = "h5netcdf", marker = "extra == 'kerchunk'" }, + { name = "h5py", marker = "extra == 'kerchunk'", specifier = ">=3.0" }, { name = "importlib-resources", specifier = ">=6.3.2" }, { name = "ipywidgets", marker = "extra == 'docs'", specifier = ">=7.7.0" }, { name = "jupyterlab", marker = "extra == 'docs'", specifier = ">=3" }, @@ -940,8 +952,8 @@ requires-dist = [ { name = "multimethod", specifier = ">=1.8" }, { name = "mypy", marker = "extra == 'test'", specifier = ">=1.11.2" }, { name = "nox", marker = "extra == 'dev'" }, - { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.24.0" }, - { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version >= '3.12' and extra == 'kerchunk'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version < '3.12' and extra == 'kerchunk'", specifier = ">=1.24.0" }, { name = "pqdm", specifier = ">=0.1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=2.4" }, { name = "pygments", marker = "extra == 'docs'", specifier = ">=2.11.1" }, @@ -965,6 +977,7 @@ requires-dist = [ { name = "vcrpy", marker = "extra == 'test'", specifier = ">=6.0.1" }, { name = "widgetsnbextension", marker = "extra == 'docs'", specifier = ">=3.6.0" }, { name = "xarray", marker = "extra == 'docs'", specifier = ">=2023.1" }, + { name = "xarray", marker = "extra == 'kerchunk'" }, ] [[package]]