From 84180b00a1fde6e03491952bd81cbb68f9819100 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 8 Apr 2024 11:15:30 -0400 Subject: [PATCH 01/12] Fix CMR-related type hints There were a number of type hints in `search.py` and `api.py` related to CMR queries that were incorrect. These were fixed. In addition, there were a number of other static type errors that were masked because of ignored `cmr` imports. Added type stubs for `python_cmr` library to unmask and address these additional type errors. Limited static type changes as much as possible to only functions and methods dealing with CMR queries and results to keep this PR manageable. Fixes #508 --- .gitignore | 76 ++++++- CHANGELOG.md | 4 + earthaccess/api.py | 23 +-- earthaccess/search.py | 442 ++++++++++++++++++++++++++++++---------- earthaccess/typing_.py | 49 +++++ pyproject.toml | 28 ++- scripts/lint.sh | 5 +- stubs/cmr/__init__.pyi | 10 + stubs/cmr/queries.pyi | 112 ++++++++++ tests/__init__.py | 0 tests/unit/test_auth.py | 8 +- 11 files changed, 617 insertions(+), 140 deletions(-) create mode 100644 earthaccess/typing_.py create mode 100644 stubs/cmr/__init__.pyi create mode 100644 stubs/cmr/queries.pyi create mode 100644 tests/__init__.py diff --git a/.gitignore b/.gitignore index ca8204ba..ab1f58ce 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,78 @@ docs/tutorials/data tests/integration/data .ruff_cache -# OS X +notebooks/data/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Created by https://www.toptal.com/developers/gitignore/api/macos +# Edit at https://www.toptal.com/developers/gitignore?templates=macos + +### macOS ### +# General .DS_Store +.AppleDouble +.LSOverride -notebooks/data/ -.vscode +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +# End of https://www.toptal.com/developers/gitignore/api/macos + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode + +### VisualStudioCode ### +.vscode/ + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode + +# Created by https://www.toptal.com/developers/gitignore/api/direnv +# Edit at https://www.toptal.com/developers/gitignore?templates=direnv + +### direnv ### +.direnv +.envrc + +# End of https://www.toptal.com/developers/gitignore/api/direnv diff --git a/CHANGELOG.md b/CHANGELOG.md index a2002d23..fe0efb32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ * fixed 483 by extracting a common CMR query method for collections and granules using SearchAfter header * Added VCR support for verifying the API call to CMR and the parsing of returned results without relying on CMR availability post development +* Enhancements: + * Corrected and enhanced static type hints for functions and methods that make + CMR queries or handle CMR query results (#508) + ## [v0.9.0] 2024-02-28 * Bug fixes: diff --git a/earthaccess/api.py b/earthaccess/api.py index a7d35fb0..ab2d7b0a 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,5 +1,3 @@ -from typing import Any, Dict, List, Optional, Type, Union - import requests import s3fs from fsspec import AbstractFileSystem @@ -7,9 +5,10 @@ import earthaccess from .auth import Auth -from .results import DataGranule +from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store +from .typing_ import Any, Dict, List, Optional, Union from .utils import _validation as validate @@ -28,9 +27,7 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: return location -def search_datasets( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataCollection]: +def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -78,9 +75,7 @@ def search_datasets( return query.get_all() -def search_data( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataGranule]: +def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -194,7 +189,7 @@ def download( def open( - granules: Union[List[str], List[earthaccess.results.DataGranule]], + granules: Union[List[str], List[DataGranule]], provider: Optional[str] = None, ) -> List[AbstractFileSystem]: """Returns a list of fsspec file-like objects that can be used to access files @@ -216,7 +211,7 @@ def open( def get_s3_credentials( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[List[earthaccess.results.DataGranule]] = None, + results: Optional[List[DataGranule]] = None, ) -> Dict[str, Any]: """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets. We can use the daac name, the provider, or a list of results from earthaccess.search_data(). @@ -239,7 +234,7 @@ def get_s3_credentials( return earthaccess.__auth__.get_s3_credentials(daac=daac, provider=provider) -def collection_query() -> Type[CollectionQuery]: +def collection_query() -> CollectionQuery: """Returns a query builder instance for NASA collections (datasets). Returns: @@ -252,7 +247,7 @@ def collection_query() -> Type[CollectionQuery]: return query_builder -def granule_query() -> Type[GranuleQuery]: +def granule_query() -> GranuleQuery: """Returns a query builder instance for data granules Returns: @@ -311,7 +306,7 @@ def get_requests_https_session() -> requests.Session: def get_s3fs_session( daac: Optional[str] = None, provider: Optional[str] = None, - results: Optional[earthaccess.results.DataGranule] = None, + results: Optional[DataGranule] = None, ) -> s3fs.S3FileSystem: """Returns a fsspec s3fs file session for direct access when we are in us-west-2. diff --git a/earthaccess/search.py b/earthaccess/search.py index 335389fe..04e1c7ac 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -1,15 +1,30 @@ import datetime as dt from inspect import getmembers, ismethod -from typing import Any, List, Optional, Tuple, Type, Union -import dateutil.parser as parser # type: ignore +import dateutil.parser as parser import requests from cmr import CollectionQuery, GranuleQuery -# type: ignore from .auth import Auth from .daac import find_provider, find_provider_by_shortname from .results import DataCollection, DataGranule +from .typing_ import ( + Any, + Dict, + List, + Never, + Optional, + Self, + Sequence, + SupportsFloat, + Tuple, + TypeAlias, + Union, + override, +) + +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] def get_results( @@ -32,17 +47,20 @@ def get_results( page_size = min(limit, 2000) url = query._build_url() - results: List = [] + results: List[Any] = [] more_results = True headers = dict(query.headers or {}) + while more_results: response = requests.get(url, headers=headers, params={"page_size": page_size}) - headers["cmr-search-after"] = response.headers.get("cmr-search-after") + + if cmr_search_after := query.headers.get("cmr-search-after"): + headers["cmr-search-after"] = cmr_search_after try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex latest = response.json()["items"] @@ -60,7 +78,7 @@ class DataCollections(CollectionQuery): the response has to be in umm_json to use the result classes. """ - _fields = None + _fields: Optional[List[str]] = None _format = "umm_json" def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: @@ -71,21 +89,27 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No for queries that need authentication, e.g. restricted datasets. """ super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False self.params["has_granules"] = True self.params["include_granule_counts"] = True - def hits(self) -> int: + @override + def hits(self) -> Union[int, Never]: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. Restricted datasets will always return zero results even if there are results. + Raises: + RuntimeError: if the CMR query fails Returns: The number of results reported by CMR. @@ -97,11 +121,12 @@ def hits(self) -> int: try: response.raise_for_status() except requests.exceptions.HTTPError as ex: - raise RuntimeError(ex.response.text) + raise RuntimeError(ex.response.text) from ex return int(response.headers["CMR-Hits"]) - def get(self, limit: int = 2000) -> List[DataCollection]: + @override + def get(self, limit: int = 2000) -> Union[List[DataCollection], Never]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. @@ -113,16 +138,20 @@ def get(self, limit: int = 2000) -> List[DataCollection]: Parameters: limit: The number of results to return + Raises: + RuntimeError: if the CMR query fails + Returns: - query results as a list of `DataCollection` instances. + query results as a (possibly empty) list of `DataCollection` instances. """ - return list( + return [ DataCollection(collection, self._fields) for collection in get_results(self, limit) - ) + ] - def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: + @override + def concept_id(self, IDs: Sequence[str]) -> Union[Self, Never]: """Filter by concept ID. For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS @@ -135,22 +164,30 @@ def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: Parameters: IDs: ID(s) to search by. Can be provided as a string or list of strings. + + Raises: + ValueError: if an ID does not start with a valid prefix + + Returns: + self """ - super().concept_id(IDs) - return self + return super().concept_id(IDs) - def keyword(self, text: str) -> Type[CollectionQuery]: + @override + def keyword(self, text: str) -> Self: """Case-insensitive and wildcard (*) search through over two dozen fields in a CMR collection record. This allows for searching against fields like summary and science keywords. Parameters: text: text to search for + + Returns: + self """ - super().keyword(text) - return self + return super().keyword(text) - def doi(self, doi: str) -> Type[CollectionQuery]: + def doi(self, doi: str) -> Union[Self, Never]: """Search datasets by DOI. ???+ Tip @@ -160,6 +197,12 @@ def doi(self, doi: str) -> Type[CollectionQuery]: Parameters: doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + + Raises: + TypeError: if `doi` is not of type `str` + + Returns: + self """ if not isinstance(doi, str): raise TypeError("doi must be of type str") @@ -167,7 +210,7 @@ def doi(self, doi: str) -> Type[CollectionQuery]: self.params["doi"] = doi return self - def instrument(self, instrument: str) -> Type[CollectionQuery]: + def instrument(self, instrument: str) -> Union[Self, Never]: """Searh datasets by instrument ???+ Tip @@ -176,6 +219,12 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: Parameters: instrument (String): instrument of a datasets, e.g. instrument=GEDI + + Raises: + TypeError: if `instrument` is not of type `str` + + Returns: + self """ if not isinstance(instrument, str): raise TypeError("instrument must be of type str") @@ -183,7 +232,7 @@ def instrument(self, instrument: str) -> Type[CollectionQuery]: self.params["instrument"] = instrument return self - def project(self, project: str) -> Type[CollectionQuery]: + def project(self, project: str) -> Union[Self, Never]: """Searh datasets by associated project ???+ Tip @@ -193,6 +242,12 @@ def project(self, project: str) -> Type[CollectionQuery]: Parameters: project (String): associated project of a datasets, e.g. project=EMIT + + Raises: + TypeError: if `project` is not of type `str` + + Returns: + self """ if not isinstance(project, str): raise TypeError("project must be of type str") @@ -200,7 +255,8 @@ def project(self, project: str) -> Type[CollectionQuery]: self.params["project"] = project return self - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: + @override + def parameters(self, **kwargs: Any) -> Union[Self, Never]: """Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values. @@ -210,12 +266,16 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: temporal=("2015-01","2015-02"), point=(42.5, -101.25)) ``` + + Raises: + ValueError: if the name of a keyword argument is not the name of a method + TypeError: if the value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters + Returns: - Query instance + self """ - methods = {} - for name, func in getmembers(self, predicate=ismethod): - methods[name] = func + methods = dict(getmembers(self, predicate=ismethod)) for key, val in kwargs.items(): # verify the key matches one of our methods @@ -236,25 +296,31 @@ def print_help(self, method: str = "fields") -> None: print([method for method in dir(self) if method.startswith("_") is False]) help(getattr(self, method)) - def fields(self, fields: Optional[List[str]] = None) -> Type[CollectionQuery]: + def fields(self, fields: Optional[List[str]] = None) -> Self: """Masks the response by only showing the fields included in this list. Parameters: fields (List): list of fields to show, these fields come from the UMM model e.g. Abstract, Title + + Returns: + self """ self._fields = fields return self - def debug(self, debug: bool = True) -> Type[CollectionQuery]: + def debug(self, debug: bool = True) -> Self: """If True, prints the actual query to CMR, notice that the pagination happens in the headers. Parameters: debug (Boolean): Print CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: + def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: """Only match granules that are hosted in the cloud. This is valid for public collections. ???+ Tip @@ -263,6 +329,12 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: Parameters: cloud_hosted: True to require granules only be online + + Raises: + TypeError: if `cloud_hosted` is not of type `bool` + + Returns: + self """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -273,7 +345,8 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. @@ -282,23 +355,32 @@ def provider(self, provider: str = "") -> Type[CollectionQuery]: Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()`. + def data_center(self, data_center_name: str) -> Self: + """An alias name for the `daac` method. Parameters: data_center_name: DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: + def daac(self, daac_short_name: str) -> Self: """Only match collections for a given DAAC, by default the on-prem collections for the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -308,12 +390,13 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[CollectionQuery]: + ) -> Union[Self, Never]: """Filter by an open or closed date range. Dates can be provided as datetime objects or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls to this method before calling execute(). @@ -322,6 +405,15 @@ def temporal( date_from (String or Datetime object): earliest date of temporal range date_to (String or Datetime object): latest date of temporal range exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range. + + Raises: + ValueError: if `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or if `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is greater than `date_to` + + Returns: + self """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -338,8 +430,7 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) class DataGranules(GranuleQuery): @@ -350,20 +441,27 @@ class DataGranules(GranuleQuery): _format = "umm_json" - def __init__(self, auth: Any = None, *args: Any, **kwargs: Any) -> None: + def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: """Base class for Granule and Collection CMR queries.""" super().__init__(*args, **kwargs) - self.session = requests.session() - if auth is not None and auth.authenticated: + + self.session = ( # To search, we need the new bearer tokens from NASA Earthdata - self.session = auth.get_session(bearer_token=True) + auth.get_session(bearer_token=True) + if auth is not None and auth.authenticated + else requests.session() + ) self._debug = False - def hits(self) -> int: + @override + def hits(self) -> Union[int, Never]: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. + Raises: + RuntimeError: if the CMR query fails + Returns: The number of results reported by CMR. """ @@ -382,7 +480,8 @@ def hits(self) -> int: return int(response.headers["CMR-Hits"]) - def get(self, limit: int = 2000) -> List[DataGranule]: + @override + def get(self, limit: int = 2000) -> Union[List[DataGranule], Never]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. @@ -394,16 +493,20 @@ def get(self, limit: int = 2000) -> List[DataGranule]: Parameters: limit: The number of results to return + Raises: + RuntimeError: if the CMR query fails + Returns: - query results as a list of `DataGranules` instances. + query results as a (possibly empty) list of `DataGranules` instances. """ response = get_results(self, limit) cloud = self._is_cloud_hosted(response[0]) - return list(DataGranule(granule, cloud_hosted=cloud) for granule in response) + return [DataGranule(granule, cloud_hosted=cloud) for granule in response] - def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: + @override + def parameters(self, **kwargs: Any) -> Union[Self, Never]: """Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values. @@ -414,8 +517,13 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: point=(42.5, -101.25)) ``` + Raises: + ValueError: if the name of a keyword argument is not the name of a method + TypeError: if the value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters + Returns: - Query instance + self """ methods = {} for name, func in getmembers(self, predicate=ismethod): @@ -434,7 +542,8 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self - def provider(self, provider: str = "") -> Type[CollectionQuery]: + @override + def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. For example, PODAAC is a data center or DAAC, @@ -443,23 +552,32 @@ def provider(self, provider: str = "") -> Type[CollectionQuery]: Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. + + Returns: + self """ self.params["provider"] = provider return self - def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: + def data_center(self, data_center_name: str) -> Self: """An alias name for `daac()`. Parameters: data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ return self.daac(data_center_name) - def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: + def daac(self, daac_short_name: str) -> Self: """Only match collections for a given DAAC. Default to on-prem collections for the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + + Returns: + self """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -469,18 +587,25 @@ def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: self.params["provider"] = find_provider(daac_short_name, cloud_hosted) return self - def orbit_number(self, orbit1: int, orbit2: int) -> Type[GranuleQuery]: + @override + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = None, + ) -> Self: """Filter by the orbit number the granule was acquired during. Either a single orbit can be targeted or a range of orbits. Parameter: orbit1: orbit to target (lower limit of range when orbit2 is provided) orbit2: upper limit of range + + Returns: + self """ - super().orbit_number(orbit1, orbit2) - return self + return super().orbit_number(orbit1, orbit2) - def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: + def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: """Only match granules that are hosted in the cloud. This is valid for public collections and when using the short_name parameter. Concept-Id is unambiguous. @@ -491,6 +616,12 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: Parameters: cloud_hosted: True to require granules only be online + + Raises: + TypeError: if `cloud_hosted` is not of type `bool` + + Returns: + self """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -503,7 +634,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: self.params["provider"] = provider return self - def granule_name(self, granule_name: str) -> Type[CollectionQuery]: + def granule_name(self, granule_name: str) -> Union[Self, Never]: """Find granules matching either granule ur or producer granule id, queries using the readable_granule_name metadata field. @@ -513,6 +644,12 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: Parameters: granule_name: granule name (accepts wildcards) + + Raises: + TypeError: if `granule_name` is not of type `str` + + Returns: + self """ if not isinstance(granule_name, str): raise TypeError("granule_name must be of type string") @@ -521,54 +658,89 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: self.params["options[readable_granule_name][pattern]"] = True return self - def online_only(self, online_only: bool = True) -> Type[GranuleQuery]: + @override + def online_only(self, online_only: bool = True) -> Union[Self, Never]: """Only match granules that are listed online and not available for download. The opposite of this method is downloadable(). Parameters: online_only: True to require granules only be online + + Raises: + TypeError: if `online_only` is not of type `bool` + + Returns: + self """ - super().online_only(online_only) - return self + return super().online_only(online_only) - def day_night_flag(self, day_night_flag: str) -> Type[GranuleQuery]: + @override + def day_night_flag(self, day_night_flag: str) -> Union[Self, Never]: """Filter by period of the day the granule was collected during. Parameters: day_night_flag: "day", "night", or "unspecified" + + Raises: + TypeError: if `day_night_flag` is not of type `str` + ValueError: if `day_night_flag` is not one of `"day"`, `"night"`, or + `"unspecified"` + + Returns: + self """ - super().day_night_flag(day_night_flag) - return self + return super().day_night_flag(day_night_flag) - def instrument(self, instrument: str = "") -> Type[GranuleQuery]: + @override + def instrument(self, instrument: str) -> Union[Self, Never]: """Filter by the instrument associated with the granule. Parameters: instrument: name of the instrument + + Raises: + ValueError: if `instrument` is not a non-empty string + + Returns: + self """ - super().instrument(instrument) - return self + return super().instrument(instrument) - def platform(self, platform: str = "") -> Type[GranuleQuery]: + @override + def platform(self, platform: str) -> Union[Self, Never]: """Filter by the satellite platform the granule came from. Parameters: platform: name of the satellite + + Raises: + ValueError: if `platform` is not a non-empty string + + Returns: + self """ - super().platform(platform) - return self + return super().platform(platform) + @override def cloud_cover( - self, min_cover: int = 0, max_cover: int = 100 - ) -> Type[GranuleQuery]: + self, + min_cover: Optional[FloatLike] = 0, + max_cover: Optional[FloatLike] = 100, + ) -> Union[Self, Never]: """Filter by the percentage of cloud cover present in the granule. Parameters: min_cover: minimum percentage of cloud cover max_cover: maximum percentage of cloud cover + + Raises: + ValueError: if `min_cover` or `max_cover` is not convertible to a float, + or if `min_cover` is greater than `max_cover` + + Returns: + self """ - super().cloud_cover(min_cover, max_cover) - return self + return super().cloud_cover(min_cover, max_cover) def _valid_state(self) -> bool: # spatial params must be paired with a collection limiting parameter @@ -593,33 +765,37 @@ def _is_cloud_hosted(self, granule: Any) -> bool: return True return False - def short_name(self, short_name: str = "") -> Type[GranuleQuery]: + @override + def short_name(self, short_name: str) -> Self: """Filter by short name (aka product or collection name). Parameters: short_name: name of a collection Returns: - Query instance + self """ - super().short_name(short_name) - return self + return super().short_name(short_name) - def debug(self, debug: bool = True) -> Type[GranuleQuery]: + def debug(self, debug: bool = True) -> Self: """If True, prints the actual query to CMR, notice that the pagination happens in the headers. Parameters: debug: Print CMR query. + + Returns: + self """ - self._debug = True + self._debug = debug return self + @override def temporal( self, date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Type[GranuleQuery]: + ) -> Union[Self, Never]: """Filter by an open or closed date range. Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls to this method before calling execute(). @@ -628,6 +804,15 @@ def temporal( date_from: earliest date of temporal range date_to: latest date of temporal range exclude_boundary: whether to exclude the date_from/to in the matched range + + Raises: + ValueError: if `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or if `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is greater than `date_to` + + Returns: + self """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -644,46 +829,63 @@ def temporal( print("The provided end date was not recognized") date_to = "" - super().temporal(date_from, date_to, exclude_boundary) - return self + return super().temporal(date_from, date_to, exclude_boundary) - def version(self, version: str = "") -> Type[GranuleQuery]: + @override + def version(self, version: str) -> Self: """Filter by version. Note that CMR defines this as a string. For example, MODIS version 6 products must be searched for with "006". Parameters: version: version string + + Returns: + self """ - super().version(version) - return self + return super().version(version) - def point(self, lon: str, lat: str) -> Type[GranuleQuery]: + @override + def point(self, lon: FloatLike, lat: FloatLike) -> Union[Self, Never]: """Filter by granules that include a geographic point. Parameters: lon (String): longitude of geographic point lat (String): latitude of geographic point + + Raises: + ValueError: if `lon` or `lat` cannot be converted to a float + + Returns: + self """ - super().point(lon, lat) - return self + return super().point(lon, lat) - def polygon(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: + @override + def polygon(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: """Filter by granules that overlap a polygonal area. Must be used in combination with a collection filtering parameter such as short_name or entry_title. Parameters: coordinates: list of (lon, lat) tuples + + Raises: + ValueError: if `coordinates` is not a sequence of at least 4 coordinate + pairs, any of the coordinates cannot be converted to a float, or the first + and last coordinate pairs are not equal + + Returns: + self """ - super().polygon(coordinates) - return self + return super().polygon(coordinates) + @override def bounding_box( self, - lower_left_lon: str, - lower_left_lat: str, - upper_right_lon: str, - upper_right_lat: str, - ) -> Type[GranuleQuery]: + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Union[Self, Never]: """Filter by granules that overlap a bounding box. Must be used in combination with a collection filtering parameter such as short_name or entry_title. @@ -692,33 +894,51 @@ def bounding_box( lower_left_lat: lower left latitude of the box upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box + + Raises: + ValueError: if any of the coordinates cannot be converted to a float + + Returns: + self """ - super().bounding_box( + return super().bounding_box( lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat ) - return self - def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: + @override + def line(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: """Filter by granules that overlap a series of connected points. Must be used in combination with a collection filtering parameter such as short_name or entry_title. Parameters: coordinates: a list of (lon, lat) tuples + + Raises: + ValueError: if `coordinates` is not a sequence of at least 2 coordinate + pairs, or any of the coordinates cannot be converted to a float + + Returns: + self """ - super().line(coordinates) - return self + return super().line(coordinates) - def downloadable(self, downloadable: bool = True) -> Type[GranuleQuery]: + @override + def downloadable(self, downloadable: bool = True) -> Union[Self, Never]: """Only match granules that are available for download. The opposite of this method is online_only(). Parameters: downloadable: True to require granules be downloadable + + Raises: + TypeError: if `downloadable` is not of type `bool` + + Returns: + self """ - super().downloadable(downloadable) - return self + return super().downloadable(downloadable) - def doi(self, doi: str) -> Type[GranuleQuery]: + def doi(self, doi: str) -> Union[Self, Never]: """Search data granules by DOI ???+ Tip @@ -726,7 +946,13 @@ def doi(self, doi: str) -> Type[GranuleQuery]: earthaccess will grab the concept_id for the query to CMR. Parameters: - doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + doi: DOI of a dataset, e.g. 10.5067/AQR50-3Q7CS + + Raises: + RuntimeError: if the CMR query to get the collection for the DOI fails + + Returns: + self """ collection = DataCollections().doi(doi).get() if len(collection) > 0: diff --git a/earthaccess/typing_.py b/earthaccess/typing_.py new file mode 100644 index 00000000..c8a3507f --- /dev/null +++ b/earthaccess/typing_.py @@ -0,0 +1,49 @@ +""" +Convenience module for importing types from the typing module, abstracting away +the differences between Python versions. +""" + +import sys +from typing import Any, Callable, Optional, SupportsFloat, Type, Union, cast + +if sys.version_info < (3, 9): + from typing import Dict, List, Mapping, Sequence, Tuple +else: + from builtins import dict as Dict, list as List, tuple as Tuple + from collections.abc import Mapping, Sequence + +if sys.version_info < (3, 10): + from typing_extensions import TypeAlias +else: + from typing import TypeAlias + +if sys.version_info < (3, 11): + from typing import NoReturn as Never + + from typing_extensions import Self +else: + from typing import Never, Self + +if sys.version_info < (3, 12): + from typing_extensions import override +else: + from typing import override + +__all__ = [ + "Any", + "Callable", + "Dict", + "List", + "Mapping", + "Never", + "Optional", + "Self", + "Sequence", + "SupportsFloat", + "Tuple", + "Type", + "TypeAlias", + "Union", + "cast", + "override", +] diff --git a/pyproject.toml b/pyproject.toml index 6484f32e..d069ef5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,22 +87,37 @@ build-backend = "poetry.core.masonry.api" [tool.pytest] filterwarnings = ["error::UserWarning"] - [tool.mypy] -disallow_untyped_defs = false -ignore_missing_imports = true +mypy_path = ["earthaccess", "tests", "stubs"] +disallow_untyped_defs = true +# TODO: incrementally work towards strict mode (currently too many errors) +# strict = true +pretty = true # Show additional context in error messages +enable_error_code = "redundant-self" [[tool.mypy.overrides]] module = [ "tests.*", ] -ignore_errors = true +disallow_untyped_defs = false + +[[tool.mypy.overrides]] +module = [ + "fsspec.*", + "kerchunk.*", + "pqdm.*", + "s3fs", + "tinynetrc.*", # TODO: generate stubs for tinynetrc and remove this line +] +ignore_missing_imports = true +[tool.pyright] +include = ["earthaccess"] +stubPath = "./stubs" [tool.ruff] line-length = 88 -src = ["earthaccess", "tests"] -exclude = ["mypy-stubs", "stubs", "typeshed"] +src = ["earthaccess", "stubs", "tests"] [tool.ruff.lint] extend-select = ["I"] @@ -110,7 +125,6 @@ extend-select = ["I"] [tool.ruff.lint.isort] combine-as-imports = true - [tool.bumpversion] current_version = "0.9.0" commit = false diff --git a/scripts/lint.sh b/scripts/lint.sh index 3a528811..02f9c70a 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -set -e -set -x +set -ex -mypy earthaccess --disallow-untyped-defs +mypy earthaccess stubs tests ruff check . diff --git a/stubs/cmr/__init__.pyi b/stubs/cmr/__init__.pyi new file mode 100644 index 00000000..3ea9733e --- /dev/null +++ b/stubs/cmr/__init__.pyi @@ -0,0 +1,10 @@ +from .queries import ( + CMR_OPS as CMR_OPS, + CMR_SIT as CMR_SIT, + CMR_UAT as CMR_UAT, + CollectionQuery as CollectionQuery, + GranuleQuery as GranuleQuery, + ServiceQuery as ServiceQuery, + ToolQuery as ToolQuery, + VariableQuery as VariableQuery, +) diff --git a/stubs/cmr/queries.pyi b/stubs/cmr/queries.pyi new file mode 100644 index 00000000..41d18b53 --- /dev/null +++ b/stubs/cmr/queries.pyi @@ -0,0 +1,112 @@ +import sys +from datetime import datetime +from typing import Any, Optional, SupportsFloat, Union + +if sys.version_info < (3, 9): + from typing import List, MutableMapping, Sequence, Tuple +else: + from builtins import list as List, tuple as Tuple + from collections.abc import MutableMapping, Sequence + +if sys.version_info < (3, 10): + from typing_extensions import TypeAlias +else: + from typing import TypeAlias + +if sys.version_info < (3, 11): + from typing import NoReturn as Never + + from typing_extensions import Self +else: + from typing import Never, Self + +CMR_OPS: str +CMR_UAT: str +CMR_SIT: str + +FloatLike: TypeAlias = Union[str, SupportsFloat] +PointLike: TypeAlias = Tuple[FloatLike, FloatLike] + +class Query: + params: MutableMapping[str, Any] + options: MutableMapping[str, Any] + concept_id_chars: Sequence[str] + headers: MutableMapping[str, str] + + def __init__(self, route: str, mode: str = ...) -> None: ... + def _build_url(self) -> Union[str, Never]: ... + def get(self, limit: int = ...) -> Union[List[Any], Never]: ... + def hits(self) -> Union[int, Never]: ... + def get_all(self) -> Union[List[Any], Never]: ... + def parameters(self, **kwargs: Any) -> Self: ... + def format(self, output_format: str = "json") -> Union[Self, Never]: ... + def concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... + def provider(self, provider: str) -> Self: ... + def mode(self, mode: str = ...) -> Union[None, Never]: ... + def token(self, token: str) -> Self: ... + def bearer_token(self, bearer_token: str) -> Self: ... + +class GranuleCollectionBaseQuery(Query): + def online_only(self, online_only: bool = True) -> Self: ... + def temporal( + self, + date_from: Optional[Union[str, datetime]], + date_to: Optional[Union[str, datetime]], + exclude_boundary: bool = False, + ) -> Union[Self, Never]: ... + def short_name(self, short_name: str) -> Self: ... + def version(self, version: str) -> Self: ... + def point(self, lon: FloatLike, lat: FloatLike) -> Self: ... + def circle( + self, lon: FloatLike, lat: FloatLike, dist: FloatLike + ) -> Union[Self, Never]: ... + def polygon(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: ... + def bounding_box( + self, + lower_left_lon: FloatLike, + lower_left_lat: FloatLike, + upper_right_lon: FloatLike, + upper_right_lat: FloatLike, + ) -> Self: ... + def line(self, coordinates: Sequence[PointLike]) -> Self: ... + def downloadable(self, downloadable: bool = True) -> Self: ... + def entry_title(self, entry_title: str) -> Self: ... + +class GranuleQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def orbit_number( + self, + orbit1: FloatLike, + orbit2: Optional[FloatLike] = ..., + ) -> Self: ... + def day_night_flag(self, day_night_flag: str) -> Union[Self, Never]: ... + def cloud_cover( + self, + min_cover: Optional[FloatLike] = ..., + max_cover: Optional[FloatLike] = ..., + ) -> Self: ... + def instrument(self, instrument: str) -> Union[Self, Never]: ... + def platform(self, platform: str) -> Union[Self, Never]: ... + def sort_key(self, sort_key: str) -> Union[Self, Never]: ... + def granule_ur(self, granule_ur: str) -> Union[Self, Never]: ... + +class CollectionQuery(GranuleCollectionBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + def archive_center(self, center: str) -> Self: ... + def keyword(self, text: str) -> Self: ... + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def tool_concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... + def service_concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... + +class ToolServiceVariableBaseQuery(Query): + def native_id(self, native_ids: Sequence[str]) -> Self: ... + def name(self, name: str) -> Self: ... + +class ToolQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class ServiceQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... + +class VariableQuery(ToolServiceVariableBaseQuery): + def __init__(self, mode: str = ...) -> None: ... diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 0c59fc86..b2b0a048 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -11,7 +11,7 @@ class TestCreateAuth(unittest.TestCase): @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: + def test_auth_gets_proper_credentials(self, user_input, user_password): user_input.return_value = "user" user_password.return_value = "password" json_response = [ @@ -53,9 +53,7 @@ def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_can_create_proper_credentials( - self, user_input, user_password - ) -> bool: + def test_auth_can_create_proper_credentials(self, user_input, user_password): user_input.return_value = "user" user_password.return_value = "password" json_response = {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"} @@ -94,7 +92,7 @@ def test_auth_can_create_proper_credentials( @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_fails_for_wrong_credentials(self, user_input, user_password) -> bool: + def test_auth_fails_for_wrong_credentials(self, user_input, user_password): user_input.return_value = "bad_user" user_password.return_value = "bad_password" json_response = {"error": "wrong credentials"} From 57ebc9a946811a97d8db3a8f0415d67d1fa947a6 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 8 Apr 2024 12:59:12 -0400 Subject: [PATCH 02/12] Fix broken mindeps build --- ci/environment-mindeps.yaml | 2 ++ pyproject.toml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ci/environment-mindeps.yaml b/ci/environment-mindeps.yaml index 75037c51..3416a5f6 100644 --- a/ci/environment-mindeps.yaml +++ b/ci/environment-mindeps.yaml @@ -21,7 +21,9 @@ dependencies: - responses - pytest - pytest-cov + - python-magic - mypy + - types-python-dateutil - types-requests - types-setuptools - ruff diff --git a/pyproject.toml b/pyproject.toml index d069ef5b..fdc461ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ pymdown-extensions = ">=9.2" pygments = ">=2.11.1" responses = ">=0.14" ruff = "^0.1.6" +types-python-dateutil = ">=2.8.2" types-requests = ">=0.1" types-setuptools = ">=0.1" ipywidgets = ">=7.7.0" @@ -104,6 +105,7 @@ disallow_untyped_defs = false [[tool.mypy.overrides]] module = [ "fsspec.*", + "dask.*", "kerchunk.*", "pqdm.*", "s3fs", From f1e62c4e98c16066feee0d3dce719cc43f1ec17b Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 9 Apr 2024 19:04:18 -0400 Subject: [PATCH 03/12] Align vcrpy usage with VCRTestCase See https://vcrpy.readthedocs.io/en/latest/usage.html#unittest-integration --- earthaccess/search.py | 4 +- poetry.lock | 17 +- pyproject.toml | 1 + ...esults.test_collections_less_than_2k.yaml} | 0 ...esults.test_collections_more_than_2k.yaml} | 0 .../TestResults.test_data_links.yaml | 466 ++++++++++++++++++ ...KM_2000.yaml => TestResults.test_get.yaml} | 0 ...estResults.test_get_all_less_than_2k.yaml} | 0 ...estResults.test_get_all_more_than_2k.yaml} | 0 ... TestResults.test_get_more_than_2000.yaml} | 0 tests/unit/test_results.py | 167 +++---- 11 files changed, 553 insertions(+), 102 deletions(-) rename tests/unit/fixtures/vcr_cassettes/{PODAAC.yaml => TestResults.test_collections_less_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{ALL.yaml => TestResults.test_collections_more_than_2k.yaml} (100%) create mode 100644 tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml rename tests/unit/fixtures/vcr_cassettes/{MOD02QKM_2000.yaml => TestResults.test_get.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{TELLUS_GRAC.yaml => TestResults.test_get_all_less_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{CYGNSS.yaml => TestResults.test_get_all_more_than_2k.yaml} (100%) rename tests/unit/fixtures/vcr_cassettes/{MOD02QKM.yaml => TestResults.test_get_more_than_2000.yaml} (100%) diff --git a/earthaccess/search.py b/earthaccess/search.py index 04e1c7ac..0b54bcfd 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -3,6 +3,7 @@ import dateutil.parser as parser import requests + from cmr import CollectionQuery, GranuleQuery from .auth import Auth @@ -10,7 +11,6 @@ from .results import DataCollection, DataGranule from .typing_ import ( Any, - Dict, List, Never, Optional, @@ -54,7 +54,7 @@ def get_results( while more_results: response = requests.get(url, headers=headers, params={"page_size": page_size}) - if cmr_search_after := query.headers.get("cmr-search-after"): + if cmr_search_after := response.headers.get("cmr-search-after"): headers["cmr-search-after"] = cmr_search_after try: diff --git a/poetry.lock b/poetry.lock index 5ca33866..04267629 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -3084,8 +3084,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3850,6 +3850,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3857,8 +3858,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3875,6 +3884,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3882,6 +3892,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -5109,4 +5120,4 @@ kerchunk = ["dask", "kerchunk"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "5344a948e7ae73de6bcfd7fa30089469daf6b232e3f0498cc1a47ba860ebb497" +content-hash = "9f38ee827a93f83af0b7b666499b649afa30d6c18e153fea048e0140dbe1e7eb" diff --git a/pyproject.toml b/pyproject.toml index fdc461ea..14498143 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ module = [ "pqdm.*", "s3fs", "tinynetrc.*", # TODO: generate stubs for tinynetrc and remove this line + "vcr.unittest", # TODO: generate stubs for vcr and remove this line ] ignore_missing_imports = true diff --git a/tests/unit/fixtures/vcr_cassettes/PODAAC.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/PODAAC.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_less_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/ALL.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/ALL.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_collections_more_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml new file mode 100644 index 00000000..1b9e08ac --- /dev/null +++ b/tests/unit/fixtures/vcr_cassettes/TestResults.test_data_links.yaml @@ -0,0 +1,466 @@ +interactions: +- request: + body: null + headers: + Accept: + - application/json + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/tokens + response: + body: + string: '[{"access_token":"eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIjoiVXNlciIsInVpZCI6ImRzY2h1Y2siLCJleHAiOjE3MTcyNjM4MTMsImlhdCI6MTcxMjA3OTgxMywiaXNzIjoiRWFydGhkYXRhIExvZ2luIn0.S_tw0-5JNFEv3si07GYVxvQi81QejNAT2Sh2ZIxAwmqr9UqoSmYg2Wp2Jdn3jaWrSVsRgxBXuLD5w7XFeRju2qOtIqovN3XGJ8VnTdvpklr-gTjk_iLq58334Zzbu5ntnqy-QTzPCKvjvqr3GNuIJcp9z7j5rzd3MEUYOFP1xsd8wehGLpBHzT6ZSzCOwdgzE1AufKq9Vd2GqM_5bc3M9cj-gGy2g3m1mP2OB41wiGvPzup79ds4t_gEPkCecm2rplCP4n1hrY6ZQtXshgM6o49J1nkGSJjE0olHcPwEujKE2s1htWZEycI1TCCxrGpx8K1vwEd0lNaekgPUWwdOlA","token_type":"Bearer","expiration_date":"06/01/2024"}]' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"61d0ce8df0bc684ac04ce623aea3668c" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - 28f6c88b-114d-4319-b6a2-0de0f54c9405 + X-Runtime: + - '0.013338' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/api/users/dschuck?client_id=ntD0YGC_SM3Bjs-Tnxd7bg + response: + body: + string: '{"uid":"dschuck","first_name":"Charles","last_name":"Daniels","email_address":"chuck@developmentseed.org","registered_date":" + 2 Apr 2024 17:43:33PM","country":"United States","study_area":"Other","allow_auth_app_emails":true,"user_type":"Application","affiliation":"Commercial","agreed_to_meris_eula":true,"agreed_to_sentinel_eula":true,"email_verified":true,"user_groups":[],"user_authorized_apps":23,"nams_auid":null}' + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"5d6f0c723c97c730432ca73084995037" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - c8cf2bd3-731d-4863-8967-1906de679cbc + X-Runtime: + - '0.017383' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://urs.earthdata.nasa.gov/profile + response: + body: + string: '' + headers: + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Type: + - text/html; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + Location: + - https://urs.earthdata.nasa.gov/home + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Set-Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0; path=/; expires=Wed, 10 + Apr 2024 21:58:55 GMT; HttpOnly + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - b6384698-e18b-4a99-b80c-b0ebe6cb80b7 + X-Runtime: + - '0.008282' + X-XSS-Protection: + - 1; mode=block + status: + code: 302 + message: Found +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0 + method: GET + uri: https://urs.earthdata.nasa.gov/home + response: + body: + string: "\n\n\n\n\n \n + \ \n \n + \ Earthdata Login\n \n \n\n + \ \n \n \n\n \n \n \n \n \n \n\n \n\n \n + \ \n \n\n + \ \n \n + \ \n\n
\n \n + \
\n
\n
\n + \

Earthdata Login

\n Earthdata Login\n
\n \"Three\n \n
\n\n
\n + \ You must be logged in to access this page\n
\n\n\n\n\n\n\n\n\n + \
\n
\n

\n \n + \ \n \n

\n

\n
\n \n \n

\n

\n + \ \n

\n

\n \n

\n \n

\n \n Register\n

\n

\n + \ \n I + don’t remember my username\n
\n + \ I don’t remember my + password\n
\n \n Help\n

\n
\n\n\n
\n
\n

Get + single sign-on access to all your favorite EOSDIS sites

\n Register for a Profile\n
\n
\n \n By clicking the Log In button above, + you are acknowledging that all Earthdata Login applications running in DAACs \n will have + access to my profile information. \n \n
\n
\n

\n \n + \ Protection and maintenance of user profile information is described + in\n NASA's + Web Privacy Policy.\n \n

\n
\n
\n + \

\n \n Protection and maintenance of user profile + information is described in\n NASA's + Web Privacy Policy.\n \n

\n
\n
\n + \ \n US Govt Property. Unauthorized use subject to prosecution. + Use subject to monitoring per\n NPD2810.\n + \ \n
\n
\n \n
\n
\n \n\n \n + \ \n \n + \ \n + \ \n + \ \n\n \n \n + \ \n\n \n \n\n" + headers: + Cache-Control: + - no-store + Connection: + - keep-alive + Content-Type: + - text/html; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:55 GMT + ETag: + - W/"7af405988c901d45ff1b80e3d54e85fa" + Expires: + - Fri, 01 Jan 1990 00:00:00 GMT + Pragma: + - no-cache + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx/1.22.1 + Set-Cookie: + - _urs-gui_session=abd23c587ce267b8c84ef154346028b0; path=/; expires=Wed, 10 + Apr 2024 21:58:55 GMT; HttpOnly + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + Vary: + - Accept + X-Content-Type-Options: + - nosniff + X-Download-Options: + - noopen + X-Frame-Options: + - SAMEORIGIN + X-Permitted-Cross-Domain-Policies: + - none + X-Request-Id: + - a5f4494f-83d0-40cc-a966-a45d47155163 + X-Runtime: + - '0.012801' + X-XSS-Protection: + - 1; mode=block + content-length: + - '9058' + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205&temporal%5B%5D=2020-01-01T00:00:00Z,2022-01-01T00:00:00Z&page_size=0 + response: + body: + string: '{"hits":147,"took":55,"items":[]}' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - CMR-Hits, CMR-Request-Id, X-Request-Id, CMR-Scroll-Id, CMR-Search-After, CMR-Timed-Out, + CMR-Shapefile-Original-Point-Count, CMR-Shapefile-Simplified-Point-Count + CMR-Hits: + - '147' + CMR-Request-Id: + - c8f9b01d-a6a3-4809-8a65-e707f30b3d47 + CMR-Took: + - '55' + Connection: + - keep-alive + Content-MD5: + - 376935be7b2a0e96352603908fe0dcd5 + Content-SHA1: + - b02f8a240f36ad8cd6798334ce2455c338e6d55f + Content-Type: + - application/vnd.nasa.cmr.umm_results+json;version=1.6.5; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:56 GMT + Server: + - ServerTokens ProductOnly + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding, User-Agent + Via: + - 1.1 f300b5f0c0ff51593fb31953294424c0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - 6UKqbnR1dCiMWGTDKFtLjE_KomjiZXUhp0ICfljulkAzPCeqJgb3cw== + X-Amz-Cf-Pop: + - PHL51-P1 + X-Cache: + - Miss from cloudfront + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Request-Id: + - 6UKqbnR1dCiMWGTDKFtLjE_KomjiZXUhp0ICfljulkAzPCeqJgb3cw== + X-XSS-Protection: + - 1; mode=block + content-length: + - '33' + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Connection: + - keep-alive + method: GET + uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205&temporal%5B%5D=2020-01-01T00:00:00Z,2022-01-01T00:00:00Z&page_size=1 + response: + body: + string: '{"hits":147,"took":253,"items":[{"meta":{"concept-type":"granule","concept-id":"G2546526969-POCLOUD","revision-id":2,"native-id":"ssh_grids_v2205_2020010212","collection-concept-id":"C2270392799-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2023-01-11T00:16:28.862Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2020-01-02T00:00:00.000Z","BeginningDateTime":"2020-01-02T00:00:00.000Z"}},"MetadataSpecification":{"URL":"https://cdn.earthdata.nasa.gov/umm/granule/v1.6.5","Name":"UMM-G","Version":"1.6.5"},"GranuleUR":"ssh_grids_v2205_2020010212","ProviderDates":[{"Type":"Insert","Date":"2023-01-11T00:16:13.878Z"},{"Type":"Update","Date":"2023-01-11T00:16:13.878Z"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":0.083,"SouthBoundingCoordinate":-79.917,"EastBoundingCoordinate":180,"NorthBoundingCoordinate":79.917},{"WestBoundingCoordinate":-180,"SouthBoundingCoordinate":-79.917,"EastBoundingCoordinate":-0.083,"NorthBoundingCoordinate":79.917}]}}},"DataGranule":{"ArchiveAndDistributionInformation":[{"SizeUnit":"MB","Size":9.246453285217285,"Checksum":{"Value":"9002febf17632e5921eba5b8f62237e6","Algorithm":"MD5"},"SizeInBytes":9695609,"Name":"ssh_grids_v2205_2020010212.nc"},{"SizeUnit":"MB","Size":6.008148193359375E-5,"Checksum":{"Value":"b0c271019f89f876b2d3c0a9c46b8f77","Algorithm":"MD5"},"SizeInBytes":63,"Name":"ssh_grids_v2205_2020010212.nc.md5"}],"DayNightFlag":"Unspecified","ProductionDateTime":"2022-10-30T20:57:22.377Z"},"CollectionReference":{"Version":"2205","ShortName":"SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205"},"RelatedUrls":[{"URL":"https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc.md5","Description":"Download + ssh_grids_v2205_2020010212.nc.md5","Type":"EXTENDED METADATA"},{"URL":"s3://podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc.md5","Description":"This + link provides direct download access via S3 to the granule","Type":"EXTENDED + METADATA"},{"URL":"https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc","Description":"Download + ssh_grids_v2205_2020010212.nc","Type":"GET DATA"},{"URL":"s3://podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_2020010212.nc","Description":"This + link provides direct download access via S3 to the granule","Type":"GET DATA + VIA DIRECT ACCESS"},{"URL":"https://archive.podaac.earthdata.nasa.gov/s3credentials","Description":"api + endpoint to retrieve temporary credentials valid for same-region direct s3 + access","Type":"VIEW RELATED INFORMATION"},{"URL":"https://opendap.earthdata.nasa.gov/collections/C2270392799-POCLOUD/granules/ssh_grids_v2205_2020010212","Type":"USE + SERVICE API","Subtype":"OPENDAP DATA","Description":"OPeNDAP request URL"}]}}]}' + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - CMR-Hits, CMR-Request-Id, X-Request-Id, CMR-Scroll-Id, CMR-Search-After, CMR-Timed-Out, + CMR-Shapefile-Original-Point-Count, CMR-Shapefile-Simplified-Point-Count + CMR-Hits: + - '147' + CMR-Request-Id: + - 59762a2f-23b9-45a2-a99b-be84748be0e5 + CMR-Search-After: + - '["pocloud",1577923200000,2546526969]' + CMR-Took: + - '254' + Connection: + - keep-alive + Content-MD5: + - 53cf1a1f972e393a32e3cd15ec36f700 + Content-SHA1: + - 60c47f03299dda570448eaecfbd8b3ed10f7bd5a + Content-Type: + - application/vnd.nasa.cmr.umm_results+json;version=1.6.5; charset=utf-8 + Date: + - Tue, 09 Apr 2024 21:58:56 GMT + Server: + - ServerTokens ProductOnly + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding, User-Agent + Via: + - 1.1 f300b5f0c0ff51593fb31953294424c0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - N9NqPFJ1jWMYiNKZDyuZ71_auV8xRiEL-06uHzn3q2PrTU66FCOKFA== + X-Amz-Cf-Pop: + - PHL51-P1 + X-Cache: + - Miss from cloudfront + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Request-Id: + - N9NqPFJ1jWMYiNKZDyuZ71_auV8xRiEL-06uHzn3q2PrTU66FCOKFA== + X-XSS-Protection: + - 1; mode=block + content-length: + - '3092' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_less_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_less_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_more_than_2k.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_all_more_than_2k.yaml diff --git a/tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml b/tests/unit/fixtures/vcr_cassettes/TestResults.test_get_more_than_2000.yaml similarity index 100% rename from tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml rename to tests/unit/fixtures/vcr_cassettes/TestResults.test_get_more_than_2000.yaml diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 06f8256d..2113ada5 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -1,26 +1,15 @@ import logging -import unittest import earthaccess -import vcr from earthaccess.search import DataCollections -my_vcr = vcr.VCR( - record_mode="once", - decode_compressed_response=True, - # Header matching is not set by default, we need that to test the - # search-after functionality is performing correctly. - match_on=["method", "scheme", "host", "port", "path", "query", "headers"], -) +from vcr.unittest import VCRTestCase # type: ignore[import-untyped] logging.basicConfig() -vcr_log = logging.getLogger("vcr") -vcr_log.setLevel(logging.ERROR) +logging.getLogger("vcr").setLevel(logging.ERROR) -headers_to_filters = ["authorization", "Set-Cookie", "User-Agent", "Accept-Encoding"] - -def assert_unique_results(results): +def unique_results(results): """ When we invoke a search request multiple times we want to ensure that we don't get the same results back. This is a one shot test as the results are preserved @@ -30,7 +19,31 @@ def assert_unique_results(results): return len(unique_concept_ids) == len(results) -class TestResults(unittest.TestCase): +class TestResults(VCRTestCase): + def _get_vcr(self, **kwargs): + myvcr = super(TestResults, self)._get_vcr(**kwargs) + myvcr.cassette_library_dir = "tests/unit/fixtures/vcr_cassettes" + myvcr.decode_compressed_response = True + # Header matching is not set by default, we need that to test the + # search-after functionality is performing correctly. + myvcr.match_on = [ + "method", + "scheme", + "host", + "port", + "path", + "query", + "headers", + ] + myvcr.filter_headers = [ + "Accept-Encoding", + "Authorization", + "Set-Cookie", + "User-Agent", + ] + + return myvcr + def test_data_links(self): granules = earthaccess.search_data( short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", @@ -58,18 +71,12 @@ def test_get_more_than_2000(self): then we expect multiple invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/MOD02QKM.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data(short_name="MOD02QKM", count=3000) - - self.assertEqual(len(granules), 4000) + granules = earthaccess.search_data(short_name="MOD02QKM", count=3000) - # Assert that we performed one 'hits' search and two 'results' search queries - self.assertEqual(len(cass), 3) - - assert_unique_results(granules) + # Assert that we performed one 'hits' search and two 'results' search queries + self.assertEqual(len(self.cassette), 3) + self.assertEqual(len(granules), 4000) + self.assertTrue(unique_results(granules)) def test_get(self): """ @@ -77,18 +84,12 @@ def test_get(self): to get the maximum no. of granules from a single CMR call (2000) in a single request """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/MOD02QKM_2000.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data(short_name="MOD02QKM", count=2000) - - self.assertEqual(len(granules), 2000) - - # Assert that we performed one 'hits' search and one 'results' search queries - self.assertEqual(len(cass), 2) + granules = earthaccess.search_data(short_name="MOD02QKM", count=2000) - assert_unique_results(granules) + # Assert that we performed one 'hits' search and one 'results' search queries + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(granules), 2000) + self.assertTrue(unique_results(granules)) def test_get_all_less_than_2k(self): """ @@ -96,20 +97,14 @@ def test_get_all_less_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/TELLUS_GRAC.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data( - short_name="TELLUS_GRAC_L3_JPL_RL06_LND_v04", count=2000 - ) - - self.assertEqual(len(granules), 163) - - # Assert that we performed a hits query and one search results query - self.assertEqual(len(cass), 2) + granules = earthaccess.search_data( + short_name="TELLUS_GRAC_L3_JPL_RL06_LND_v04", count=2000 + ) - assert_unique_results(granules) + # Assert that we performed a hits query and one search results query + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(granules), 163) + self.assertTrue(unique_results(granules)) def test_get_all_more_than_2k(self): """ @@ -117,20 +112,14 @@ def test_get_all_more_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/CYGNSS.yaml", - filter_headers=headers_to_filters, - ) as cass: - granules = earthaccess.search_data( - short_name="CYGNSS_NOAA_L2_SWSP_25KM_V1.2", count=3000 - ) - - self.assertEqual(len(granules), 2520) - - # Assert that we performed a hits query and two search results queries - self.assertEqual(len(cass), 3) + granules = earthaccess.search_data( + short_name="CYGNSS_NOAA_L2_SWSP_25KM_V1.2", count=3000 + ) - assert_unique_results(granules) + # Assert that we performed a hits query and two search results queries + self.assertEqual(len(self.cassette), 3) + self.assertEqual(len(granules), 2520) + self.assertTrue(unique_results(granules)) def test_collections_less_than_2k(self): """ @@ -138,21 +127,14 @@ def test_collections_less_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/PODAAC.yaml", - filter_headers=headers_to_filters, - ) as cass: - query = DataCollections().daac("PODAAC").cloud_hosted(True) - collections = query.get(20) - - self.assertEqual(len(collections), 20) - - # Assert that we performed a single search results query - self.assertEqual(len(cass), 1) + query = DataCollections().daac("PODAAC").cloud_hosted(True) + collections = query.get(20) - assert_unique_results(collections) - - self.is_using_search_after(cass) + # Assert that we performed a single search results query + self.assertEqual(len(self.cassette), 1) + self.assertEqual(len(collections), 20) + self.assertTrue(unique_results(collections)) + self.assert_is_using_search_after(self.cassette) def test_collections_more_than_2k(self): """ @@ -160,30 +142,21 @@ def test_collections_more_than_2k(self): invocations of a cmr granule search and to not fetch back more results than we ask for """ - with my_vcr.use_cassette( - "tests/unit/fixtures/vcr_cassettes/ALL.yaml", - filter_headers=headers_to_filters, - ) as cass: - query = DataCollections() - collections = query.get(3000) - - self.assertEqual(len(collections), 4000) - - # Assert that we performed two search results queries - self.assertEqual(len(cass), 2) + query = DataCollections() + collections = query.get(3000) - assert_unique_results(collections) + # Assert that we performed two search results queries + self.assertEqual(len(self.cassette), 2) + self.assertEqual(len(collections), 4000) + self.assertTrue(unique_results(collections)) + self.assert_is_using_search_after(self.cassette) - self.is_using_search_after(cass) - - def is_using_search_after(self, cass): - # Verify the page no. was not used + def assert_is_using_search_after(self, cass): first_request = True + for request in cass.requests: + # Verify the page number was not used self.assertTrue("page_num" not in request.uri) # Verify that Search After was used in all requests except first - if first_request: - self.assertFalse("CMR-Search-After" in request.headers) - else: - self.assertTrue("CMR-Search-After" in request.headers) + self.assertEqual(first_request, "CMR-Search-After" not in request.headers) first_request = False From e7e648192844c57842871f1dde0bfe9f4df3b311 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 23:24:17 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/test_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 2113ada5..6ea5071e 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -2,7 +2,6 @@ import earthaccess from earthaccess.search import DataCollections - from vcr.unittest import VCRTestCase # type: ignore[import-untyped] logging.basicConfig() From 919dbfab7944e568b995657ed4fad4dc484512eb Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Fri, 12 Apr 2024 13:11:56 -0400 Subject: [PATCH 05/12] Add `py.typed` since we include type hints --- earthaccess/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 earthaccess/py.typed diff --git a/earthaccess/py.typed b/earthaccess/py.typed new file mode 100644 index 00000000..e69de29b From b8b0a2ac7a21c93146a313b311d9d414b2d0bf3b Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sat, 13 Apr 2024 10:49:57 -0400 Subject: [PATCH 06/12] Cleanup return types and docstrings - wrap docstrings at 88 characters per ruff configuration - remove `Never` from return types where method raises only for invalid input types or values - add missing `Never` return types where method raises for server-side errors --- earthaccess/api.py | 19 +- earthaccess/search.py | 401 +++++++++++++++++++++++------------------- 2 files changed, 233 insertions(+), 187 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index ab2d7b0a..bd5c100b 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -8,7 +8,7 @@ from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store -from .typing_ import Any, Dict, List, Optional, Union +from .typing_ import Any, Dict, List, Never, Optional, Union from .utils import _validation as validate @@ -27,7 +27,9 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: return location -def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: +def search_datasets( + count: int = -1, **kwargs: Any +) -> Union[List[DataCollection], Never]: """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -51,6 +53,9 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: A list of DataCollection results that can be used to get information about a dataset, e.g. concept_id, doi, etc. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_datasets( @@ -75,7 +80,7 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: return query.get_all() -def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: +def search_data(count: int = -1, **kwargs: Any) -> Union[List[DataGranule], Never]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -99,6 +104,9 @@ def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: a list of DataGranules that can be used to access the granule files by using `download()` or `open()`. + Raises: + RuntimeError: The CMR query failed. + Examples: ```python datasets = earthaccess.search_data( @@ -158,7 +166,7 @@ def download( local_path: Optional[str], provider: Optional[str] = None, threads: int = 8, -) -> List[str]: +) -> Union[List[str], Never]: """Retrieves data granules from a remote storage system. * If we run this in the cloud, we will be using S3 to move data to `local_path`. @@ -173,6 +181,9 @@ def download( Returns: List of downloaded files + + Raises: + Exception: A file download failed. """ provider = _normalize_location(provider) if isinstance(granules, DataGranule): diff --git a/earthaccess/search.py b/earthaccess/search.py index 0b54bcfd..d8d5e8c7 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -29,19 +29,23 @@ def get_results( query: Union[CollectionQuery, GranuleQuery], limit: int = 2000 -) -> List[Any]: +) -> Union[List[Any], Never]: """ Get all results up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, if the supplied value is greater then the Search-After header - will be used to iterate across multiple requests until either the limit has been reached - or there are no more results. + The default page size is 2000, if the supplied value is greater then the + Search-After header will be used to iterate across multiple requests until + either the limit has been reached or there are no more results. + Parameters: limit: The number of results to return Returns: query results as a list + + Raises: + RuntimeError: The CMR query failed. """ page_size = min(limit, 2000) @@ -74,7 +78,8 @@ def get_results( class DataCollections(CollectionQuery): """ ???+ Info - The DataCollection class queries against https://cmr.earthdata.nasa.gov/search/collections.umm_json, + The DataCollection class queries against + https://cmr.earthdata.nasa.gov/search/collections.umm_json, the response has to be in umm_json to use the result classes. """ @@ -82,7 +87,7 @@ class DataCollections(CollectionQuery): _format = "umm_json" def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> None: - """Builds an instance of DataCollections to query CMR + """Builds an instance of DataCollections to query the CMR. Parameters: auth: An authenticated `Auth` instance. This is an optional parameter @@ -108,11 +113,11 @@ def hits(self) -> Union[int, Never]: making a lightweight query to CMR and inspecting the returned headers. Restricted datasets will always return zero results even if there are results. - Raises: - RuntimeError: if the CMR query fails - Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -131,18 +136,19 @@ def get(self, limit: int = 2000) -> Union[List[DataCollection], Never]: up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: limit: The number of results to return - Raises: - RuntimeError: if the CMR query fails - Returns: - query results as a (possibly empty) list of `DataCollection` instances. + Query results as a (possibly empty) list of `DataCollection` instances. + + Raises: + RuntimeError: The CMR query failed. """ return [ @@ -151,25 +157,28 @@ def get(self, limit: int = 2000) -> Union[List[DataCollection], Never]: ] @override - def concept_id(self, IDs: Sequence[str]) -> Union[Self, Never]: + def concept_id(self, IDs: Sequence[str]) -> Self: """Filter by concept ID. - For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS + + For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, + S12345678-LPDAAC_ECS Collections, granules, tools, services are uniquely identified with this ID. - > - * If providing a collection's concept ID here, it will filter by granules associated with that collection. - * If providing a granule's concept ID here, it will uniquely identify those granules. - * If providing a tool's concept ID here, it will uniquely identify those tools. - * If providing a service's concept ID here, it will uniquely identify those services. + + * If providing a collection's concept ID, it will filter by granules associated + with that collection. + * If providing a granule's concept ID, it will uniquely identify those granules. + * If providing a tool's concept ID, it will uniquely identify those tools. + * If providing a service's concept ID, it will uniquely identify those services. Parameters: IDs: ID(s) to search by. Can be provided as a string or list of strings. - Raises: - ValueError: if an ID does not start with a valid prefix - Returns: self + + Raises: + ValueError: An ID does not start with a valid prefix. """ return super().concept_id(IDs) @@ -187,7 +196,7 @@ def keyword(self, text: str) -> Self: """ return super().keyword(text) - def doi(self, doi: str) -> Union[Self, Never]: + def doi(self, doi: str) -> Self: """Search datasets by DOI. ???+ Tip @@ -198,11 +207,11 @@ def doi(self, doi: str) -> Union[Self, Never]: Parameters: doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS - Raises: - TypeError: if `doi` is not of type `str` - Returns: self + + Raises: + TypeError: `doi` is not of type `str`. """ if not isinstance(doi, str): raise TypeError("doi must be of type str") @@ -210,8 +219,8 @@ def doi(self, doi: str) -> Union[Self, Never]: self.params["doi"] = doi return self - def instrument(self, instrument: str) -> Union[Self, Never]: - """Searh datasets by instrument + def instrument(self, instrument: str) -> Self: + """Searh datasets by instrument. ???+ Tip Not all datasets have an associated instrument. This works @@ -220,11 +229,11 @@ def instrument(self, instrument: str) -> Union[Self, Never]: Parameters: instrument (String): instrument of a datasets, e.g. instrument=GEDI - Raises: - TypeError: if `instrument` is not of type `str` - Returns: self + + Raises: + TypeError: `instrument` is not of type `str`. """ if not isinstance(instrument, str): raise TypeError("instrument must be of type str") @@ -232,8 +241,8 @@ def instrument(self, instrument: str) -> Union[Self, Never]: self.params["instrument"] = instrument return self - def project(self, project: str) -> Union[Self, Never]: - """Searh datasets by associated project + def project(self, project: str) -> Self: + """Searh datasets by associated project. ???+ Tip Not all datasets have an associated project. This works @@ -243,11 +252,11 @@ def project(self, project: str) -> Union[Self, Never]: Parameters: project (String): associated project of a datasets, e.g. project=EMIT - Raises: - TypeError: if `project` is not of type `str` - Returns: self + + Raises: + TypeError: `project` is not of type `str`. """ if not isinstance(project, str): raise TypeError("project must be of type str") @@ -256,24 +265,26 @@ def project(self, project: str) -> Union[Self, Never]: return self @override - def parameters(self, **kwargs: Any) -> Union[Self, Never]: + def parameters(self, **kwargs: Any) -> Self: """Provide query parameters as keyword arguments. The keyword needs to match the name of the method, and the value should either be the value or a tuple of values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` - Raises: - ValueError: if the name of a keyword argument is not the name of a method - TypeError: if the value of a keyword argument is not an argument or tuple - of arguments matching the number and type(s) of the method's parameters - Returns: self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ methods = dict(getmembers(self, predicate=ismethod)) @@ -300,7 +311,8 @@ def fields(self, fields: Optional[List[str]] = None) -> Self: """Masks the response by only showing the fields included in this list. Parameters: - fields (List): list of fields to show, these fields come from the UMM model e.g. Abstract, Title + fields (List): list of fields to show. These fields come from the UMM model + (e.g. Abstract, Title). Returns: self @@ -309,10 +321,11 @@ def fields(self, fields: Optional[List[str]] = None) -> Self: return self def debug(self, debug: bool = True) -> Self: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + """If True, prints the actual query to CMR. Note that the pagination happens in + the headers. Parameters: - debug (Boolean): Print CMR query. + debug (Boolean): If `True`, print the CMR query. Returns: self @@ -320,21 +333,22 @@ def debug(self, debug: bool = True) -> Self: self._debug = debug return self - def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: - """Only match granules that are hosted in the cloud. This is valid for public collections. + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: + """Only match granules that are hosted in the cloud. This is valid for public + collections. ???+ Tip Cloud hosted collections can be public or restricted. Restricted collections will not be matched using this parameter Parameters: - cloud_hosted: True to require granules only be online - - Raises: - TypeError: if `cloud_hosted` is not of type `bool` + cloud_hosted: If `True`, obtain only cloud-hosted collections. Returns: self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -350,8 +364,8 @@ def provider(self, provider: str) -> Self: """Only match collections from a given provider. A NASA datacenter or DAAC can have one or more providers. - E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for on-premises data, - POCLOUD is the PODAAC provider for their data in the cloud. + E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for + on-premises data, POCLOUD is the PODAAC provider for their data in the cloud. Parameters: provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. @@ -363,7 +377,7 @@ def provider(self, provider: str) -> Self: return self def data_center(self, data_center_name: str) -> Self: - """An alias name for the `daac` method. + """An alias for the `daac` method. Parameters: data_center_name: DAAC shortname, e.g. NSIDC, PODAAC, GESDISC @@ -374,7 +388,8 @@ def data_center(self, data_center_name: str) -> Self: return self.daac(data_center_name) def daac(self, daac_short_name: str) -> Self: - """Only match collections for a given DAAC, by default the on-prem collections for the DAAC. + """Only match collections for a given DAAC, by default the on-prem collections + for the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC @@ -396,24 +411,25 @@ def temporal( date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Union[Self, Never]: - """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls - to this method before calling execute(). + ) -> Self: + """Filter by an open or closed date range. Dates can be provided as datetime + objects or ISO 8601 formatted strings. Multiple ranges can be provided by + successive calls to this method before calling execute(). Parameters: date_from (String or Datetime object): earliest date of temporal range date_to (String or Datetime object): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range. - - Raises: - ValueError: if `date_from` or `date_to` is a non-`None` value that is - neither a datetime object nor a string that can be parsed as a datetime - object; or if `date_from` and `date_to` are both datetime objects (or - parsable as such) and `date_from` is greater than `date_to` + exclude_boundary (Boolean): whether or not to exclude the date_from/to in + the matched range. Returns: self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -457,13 +473,14 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No @override def hits(self) -> Union[int, Never]: """Returns the number of hits the current query will return. - This is done by making a lightweight query to CMR and inspecting the returned headers. - - Raises: - RuntimeError: if the CMR query fails + This is done by making a lightweight query to CMR and inspecting the returned + headers. Returns: - The number of results reported by CMR. + The number of results reported by the CMR. + + Raises: + RuntimeError: The CMR query failed. """ url = self._build_url() @@ -486,18 +503,19 @@ def get(self, limit: int = 2000) -> Union[List[DataGranule], Never]: up to some limit, even if spanning multiple pages. ???+ Tip - The default page size is 2000, we need to be careful with the request size because all the JSON - elements will be loaded into memory. This is more of an issue with granules than collections as - they can be potentially millions of them. + The default page size is 2000, we need to be careful with the request size + because all the JSON elements will be loaded into memory. This is more of an + issue with granules than collections as they can be potentially millions of + them. Parameters: - limit: The number of results to return - - Raises: - RuntimeError: if the CMR query fails + limit: The number of results to return. Returns: - query results as a (possibly empty) list of `DataGranules` instances. + Query results as a (possibly empty) list of `DataGranules` instances. + + Raises: + RuntimeError: The CMR query failed. """ response = get_results(self, limit) @@ -506,24 +524,27 @@ def get(self, limit: int = 2000) -> Union[List[DataGranule], Never]: return [DataGranule(granule, cloud_hosted=cloud) for granule in response] @override - def parameters(self, **kwargs: Any) -> Union[Self, Never]: - """Provide query parameters as keyword arguments. The keyword needs to match the name - of the method, and the value should either be the value or a tuple of values. + def parameters(self, **kwargs: Any) -> Self: + """Provide query parameters as keyword arguments. The keyword needs to match the + name of the method, and the value should either be the value or a tuple of + values. ???+ Example ```python - query = DataCollections.parameters(short_name="AST_L1T", - temporal=("2015-01","2015-02"), - point=(42.5, -101.25)) + query = DataCollections.parameters( + short_name="AST_L1T", + temporal=("2015-01","2015-02"), + point=(42.5, -101.25) + ) ``` - Raises: - ValueError: if the name of a keyword argument is not the name of a method - TypeError: if the value of a keyword argument is not an argument or tuple - of arguments matching the number and type(s) of the method's parameters - Returns: self + + Raises: + ValueError: The name of a keyword argument is not the name of a method. + TypeError: The value of a keyword argument is not an argument or tuple + of arguments matching the number and type(s) of the method's parameters. """ methods = {} for name, func in getmembers(self, predicate=ismethod): @@ -560,7 +581,7 @@ def provider(self, provider: str) -> Self: return self def data_center(self, data_center_name: str) -> Self: - """An alias name for `daac()`. + """An alias for the `daac` method. Parameters: data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC @@ -571,7 +592,8 @@ def data_center(self, data_center_name: str) -> Self: return self.daac(data_center_name) def daac(self, daac_short_name: str) -> Self: - """Only match collections for a given DAAC. Default to on-prem collections for the DAAC. + """Only match collections for a given DAAC. Default to on-prem collections for + the DAAC. Parameters: daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC @@ -605,7 +627,7 @@ def orbit_number( """ return super().orbit_number(orbit1, orbit2) - def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: + def cloud_hosted(self, cloud_hosted: bool = True) -> Self: """Only match granules that are hosted in the cloud. This is valid for public collections and when using the short_name parameter. Concept-Id is unambiguous. @@ -615,13 +637,13 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: Restricted collections will not be matched using this parameter. Parameters: - cloud_hosted: True to require granules only be online - - Raises: - TypeError: if `cloud_hosted` is not of type `bool` + cloud_hosted: If `True`, obtain only granules from cloud-hosted collections. Returns: self + + Raises: + TypeError: `cloud_hosted` is not of type `bool`. """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -634,7 +656,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Union[Self, Never]: self.params["provider"] = provider return self - def granule_name(self, granule_name: str) -> Union[Self, Never]: + def granule_name(self, granule_name: str) -> Self: """Find granules matching either granule ur or producer granule id, queries using the readable_granule_name metadata field. @@ -645,11 +667,11 @@ def granule_name(self, granule_name: str) -> Union[Self, Never]: Parameters: granule_name: granule name (accepts wildcards) - Raises: - TypeError: if `granule_name` is not of type `str` - Returns: self + + Raises: + TypeError: if `granule_name` is not of type `str` """ if not isinstance(granule_name, str): raise TypeError("granule_name must be of type string") @@ -659,65 +681,66 @@ def granule_name(self, granule_name: str) -> Union[Self, Never]: return self @override - def online_only(self, online_only: bool = True) -> Union[Self, Never]: + def online_only(self, online_only: bool = True) -> Self: """Only match granules that are listed online and not available for download. - The opposite of this method is downloadable(). + The inverse of this method is `downloadable`. Parameters: - online_only: True to require granules only be online - - Raises: - TypeError: if `online_only` is not of type `bool` + online_only: If `True`, obtain only granules that are online (not + downloadable) Returns: self + + Raises: + TypeError: `online_only` is not of type `bool`. """ return super().online_only(online_only) @override - def day_night_flag(self, day_night_flag: str) -> Union[Self, Never]: + def day_night_flag(self, day_night_flag: str) -> Self: """Filter by period of the day the granule was collected during. Parameters: day_night_flag: "day", "night", or "unspecified" - Raises: - TypeError: if `day_night_flag` is not of type `str` - ValueError: if `day_night_flag` is not one of `"day"`, `"night"`, or - `"unspecified"` - Returns: self + + Raises: + TypeError: `day_night_flag` is not of type `str`. + ValueError: `day_night_flag` is not one of `"day"`, `"night"`, or + `"unspecified"`. """ return super().day_night_flag(day_night_flag) @override - def instrument(self, instrument: str) -> Union[Self, Never]: + def instrument(self, instrument: str) -> Self: """Filter by the instrument associated with the granule. Parameters: instrument: name of the instrument - Raises: - ValueError: if `instrument` is not a non-empty string - Returns: self + + Raises: + ValueError: `instrument` is not a non-empty string. """ return super().instrument(instrument) @override - def platform(self, platform: str) -> Union[Self, Never]: + def platform(self, platform: str) -> Self: """Filter by the satellite platform the granule came from. Parameters: platform: name of the satellite - Raises: - ValueError: if `platform` is not a non-empty string - Returns: self + + Raises: + ValueError: `platform` is not a non-empty string. """ return super().platform(platform) @@ -726,19 +749,19 @@ def cloud_cover( self, min_cover: Optional[FloatLike] = 0, max_cover: Optional[FloatLike] = 100, - ) -> Union[Self, Never]: + ) -> Self: """Filter by the percentage of cloud cover present in the granule. Parameters: min_cover: minimum percentage of cloud cover max_cover: maximum percentage of cloud cover - Raises: - ValueError: if `min_cover` or `max_cover` is not convertible to a float, - or if `min_cover` is greater than `max_cover` - Returns: self + + Raises: + ValueError: `min_cover` or `max_cover` is not convertible to a float, + or `min_cover` is greater than `max_cover`. """ return super().cloud_cover(min_cover, max_cover) @@ -778,10 +801,11 @@ def short_name(self, short_name: str) -> Self: return super().short_name(short_name) def debug(self, debug: bool = True) -> Self: - """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + """If True, prints the actual query to CMR, notice that the pagination happens + in the headers. Parameters: - debug: Print CMR query. + debug: If `True`, print the CMR query. Returns: self @@ -795,24 +819,26 @@ def temporal( date_from: Optional[Union[str, dt.datetime]] = None, date_to: Optional[Union[str, dt.datetime]] = None, exclude_boundary: bool = False, - ) -> Union[Self, Never]: + ) -> Self: """Filter by an open or closed date range. - Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple - ranges can be provided by successive calls to this method before calling execute(). + + Dates can be provided as a datetime objects or ISO 8601 formatted strings. + Multiple ranges can be provided by successive calls to this method before + calling execute(). Parameters: date_from: earliest date of temporal range date_to: latest date of temporal range exclude_boundary: whether to exclude the date_from/to in the matched range - Raises: - ValueError: if `date_from` or `date_to` is a non-`None` value that is - neither a datetime object nor a string that can be parsed as a datetime - object; or if `date_from` and `date_to` are both datetime objects (or - parsable as such) and `date_from` is greater than `date_to` - Returns: self + + Raises: + ValueError: `date_from` or `date_to` is a non-`None` value that is + neither a datetime object nor a string that can be parsed as a datetime + object; or `date_from` and `date_to` are both datetime objects (or + parsable as such) and `date_from` is after `date_to`. """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None and not isinstance(date_from, dt.datetime): @@ -845,36 +871,36 @@ def version(self, version: str) -> Self: return super().version(version) @override - def point(self, lon: FloatLike, lat: FloatLike) -> Union[Self, Never]: + def point(self, lon: FloatLike, lat: FloatLike) -> Self: """Filter by granules that include a geographic point. Parameters: - lon (String): longitude of geographic point - lat (String): latitude of geographic point - - Raises: - ValueError: if `lon` or `lat` cannot be converted to a float + lon: longitude of geographic point + lat: latitude of geographic point Returns: self + + Raises: + ValueError: `lon` or `lat` cannot be converted to a float. """ return super().point(lon, lat) @override - def polygon(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: - """Filter by granules that overlap a polygonal area. Must be used in combination with a - collection filtering parameter such as short_name or entry_title. + def polygon(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a polygonal area. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: coordinates: list of (lon, lat) tuples - Raises: - ValueError: if `coordinates` is not a sequence of at least 4 coordinate - pairs, any of the coordinates cannot be converted to a float, or the first - and last coordinate pairs are not equal - Returns: self + + Raises: + ValueError: `coordinates` is not a sequence of at least 4 coordinate + pairs, any of the coordinates cannot be converted to a float, or the + first and last coordinate pairs are not equal. """ return super().polygon(coordinates) @@ -885,9 +911,9 @@ def bounding_box( lower_left_lat: FloatLike, upper_right_lon: FloatLike, upper_right_lat: FloatLike, - ) -> Union[Self, Never]: - """Filter by granules that overlap a bounding box. Must be used in combination with - a collection filtering parameter such as short_name or entry_title. + ) -> Self: + """Filter by granules that overlap a bounding box. Must be used in combination + with a collection filtering parameter such as short_name or entry_title. Parameters: lower_left_lon: lower left longitude of the box @@ -895,51 +921,52 @@ def bounding_box( upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box - Raises: - ValueError: if any of the coordinates cannot be converted to a float - Returns: self + + Raises: + ValueError: A coordinate could not be converted to a float. """ return super().bounding_box( lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat ) @override - def line(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: - """Filter by granules that overlap a series of connected points. Must be used in combination - with a collection filtering parameter such as short_name or entry_title. + def line(self, coordinates: Sequence[PointLike]) -> Self: + """Filter by granules that overlap a series of connected points. Must be used + in combination with a collection filtering parameter such as short_name or + entry_title. Parameters: coordinates: a list of (lon, lat) tuples - Raises: - ValueError: if `coordinates` is not a sequence of at least 2 coordinate - pairs, or any of the coordinates cannot be converted to a float - Returns: self + + Raises: + ValueError: `coordinates` is not a sequence of at least 2 coordinate + pairs, or a coordinate could not be converted to a float. """ return super().line(coordinates) @override - def downloadable(self, downloadable: bool = True) -> Union[Self, Never]: - """Only match granules that are available for download. The opposite of this - method is online_only(). + def downloadable(self, downloadable: bool = True) -> Self: + """Only match granules that are available for download. The inverse of this + method is `online_only`. Parameters: - downloadable: True to require granules be downloadable - - Raises: - TypeError: if `downloadable` is not of type `bool` + downloadable: If `True`, obtain only granules that are downloadable. Returns: self + + Raises: + TypeError: `downloadable` is not of type `bool`. """ return super().downloadable(downloadable) def doi(self, doi: str) -> Union[Self, Never]: - """Search data granules by DOI + """Search data granules by DOI. ???+ Tip Not all datasets have an associated DOI, internally if a DOI is found @@ -948,18 +975,26 @@ def doi(self, doi: str) -> Union[Self, Never]: Parameters: doi: DOI of a dataset, e.g. 10.5067/AQR50-3Q7CS - Raises: - RuntimeError: if the CMR query to get the collection for the DOI fails - Returns: self + + Raises: + RuntimeError: The CMR query to get the collection for the DOI fails. """ + + # TODO consider deferring this query until the search is executed collection = DataCollections().doi(doi).get() + + # TODO consider raising an exception when there are multiple collections, since + # we can't know which one the user wants, and choosing one is arbitrary. if len(collection) > 0: concept_id = collection[0].concept_id() self.params["concept_id"] = concept_id else: + # TODO consider removing this print statement since we don't print such + # a message in other cases where no results are found. Seems arbitrary. print( f"earthaccess couldn't find any associated collections with the DOI: {doi}" ) + return self From 3a887462ff35eff061db406ca4de0cfea8c3ed88 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sat, 13 Apr 2024 11:30:15 -0400 Subject: [PATCH 07/12] Remove use of Never type --- earthaccess/api.py | 10 ++++------ earthaccess/search.py | 13 ++++++------- earthaccess/typing_.py | 5 +---- stubs/cmr/queries.pyi | 40 ++++++++++++++++++---------------------- 4 files changed, 29 insertions(+), 39 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index bd5c100b..1384614e 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -8,7 +8,7 @@ from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store -from .typing_ import Any, Dict, List, Never, Optional, Union +from .typing_ import Any, Dict, List, Optional, Union from .utils import _validation as validate @@ -27,9 +27,7 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: return location -def search_datasets( - count: int = -1, **kwargs: Any -) -> Union[List[DataCollection], Never]: +def search_datasets(count: int = -1, **kwargs: Any) -> List[DataCollection]: """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -80,7 +78,7 @@ def search_datasets( return query.get_all() -def search_data(count: int = -1, **kwargs: Any) -> Union[List[DataGranule], Never]: +def search_data(count: int = -1, **kwargs: Any) -> List[DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -166,7 +164,7 @@ def download( local_path: Optional[str], provider: Optional[str] = None, threads: int = 8, -) -> Union[List[str], Never]: +) -> List[str]: """Retrieves data granules from a remote storage system. * If we run this in the cloud, we will be using S3 to move data to `local_path`. diff --git a/earthaccess/search.py b/earthaccess/search.py index d8d5e8c7..98ea6b82 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -12,7 +12,6 @@ from .typing_ import ( Any, List, - Never, Optional, Self, Sequence, @@ -29,7 +28,7 @@ def get_results( query: Union[CollectionQuery, GranuleQuery], limit: int = 2000 -) -> Union[List[Any], Never]: +) -> List[Any]: """ Get all results up to some limit, even if spanning multiple pages. @@ -108,7 +107,7 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No self.params["include_granule_counts"] = True @override - def hits(self) -> Union[int, Never]: + def hits(self) -> int: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. Restricted datasets will always return zero results even if there are results. @@ -131,7 +130,7 @@ def hits(self) -> Union[int, Never]: return int(response.headers["CMR-Hits"]) @override - def get(self, limit: int = 2000) -> Union[List[DataCollection], Never]: + def get(self, limit: int = 2000) -> List[DataCollection]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. @@ -471,7 +470,7 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No self._debug = False @override - def hits(self) -> Union[int, Never]: + def hits(self) -> int: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. @@ -498,7 +497,7 @@ def hits(self) -> Union[int, Never]: return int(response.headers["CMR-Hits"]) @override - def get(self, limit: int = 2000) -> Union[List[DataGranule], Never]: + def get(self, limit: int = 2000) -> List[DataGranule]: """Get all the collections (datasets) that match with our current parameters up to some limit, even if spanning multiple pages. @@ -965,7 +964,7 @@ def downloadable(self, downloadable: bool = True) -> Self: """ return super().downloadable(downloadable) - def doi(self, doi: str) -> Union[Self, Never]: + def doi(self, doi: str) -> Self: """Search data granules by DOI. ???+ Tip diff --git a/earthaccess/typing_.py b/earthaccess/typing_.py index c8a3507f..3ed39f70 100644 --- a/earthaccess/typing_.py +++ b/earthaccess/typing_.py @@ -18,11 +18,9 @@ from typing import TypeAlias if sys.version_info < (3, 11): - from typing import NoReturn as Never - from typing_extensions import Self else: - from typing import Never, Self + from typing import Self if sys.version_info < (3, 12): from typing_extensions import override @@ -35,7 +33,6 @@ "Dict", "List", "Mapping", - "Never", "Optional", "Self", "Sequence", diff --git a/stubs/cmr/queries.pyi b/stubs/cmr/queries.pyi index 41d18b53..3b2fadc3 100644 --- a/stubs/cmr/queries.pyi +++ b/stubs/cmr/queries.pyi @@ -14,11 +14,9 @@ else: from typing import TypeAlias if sys.version_info < (3, 11): - from typing import NoReturn as Never - from typing_extensions import Self else: - from typing import Never, Self + from typing import Self CMR_OPS: str CMR_UAT: str @@ -34,15 +32,15 @@ class Query: headers: MutableMapping[str, str] def __init__(self, route: str, mode: str = ...) -> None: ... - def _build_url(self) -> Union[str, Never]: ... - def get(self, limit: int = ...) -> Union[List[Any], Never]: ... - def hits(self) -> Union[int, Never]: ... - def get_all(self) -> Union[List[Any], Never]: ... + def _build_url(self) -> str: ... + def get(self, limit: int = ...) -> List[Any]: ... + def hits(self) -> int: ... + def get_all(self) -> List[Any]: ... def parameters(self, **kwargs: Any) -> Self: ... - def format(self, output_format: str = "json") -> Union[Self, Never]: ... - def concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... + def format(self, output_format: str = "json") -> Self: ... + def concept_id(self, ids: Sequence[str]) -> Self: ... def provider(self, provider: str) -> Self: ... - def mode(self, mode: str = ...) -> Union[None, Never]: ... + def mode(self, mode: str = ...) -> None: ... def token(self, token: str) -> Self: ... def bearer_token(self, bearer_token: str) -> Self: ... @@ -53,14 +51,12 @@ class GranuleCollectionBaseQuery(Query): date_from: Optional[Union[str, datetime]], date_to: Optional[Union[str, datetime]], exclude_boundary: bool = False, - ) -> Union[Self, Never]: ... + ) -> Self: ... def short_name(self, short_name: str) -> Self: ... def version(self, version: str) -> Self: ... def point(self, lon: FloatLike, lat: FloatLike) -> Self: ... - def circle( - self, lon: FloatLike, lat: FloatLike, dist: FloatLike - ) -> Union[Self, Never]: ... - def polygon(self, coordinates: Sequence[PointLike]) -> Union[Self, Never]: ... + def circle(self, lon: FloatLike, lat: FloatLike, dist: FloatLike) -> Self: ... + def polygon(self, coordinates: Sequence[PointLike]) -> Self: ... def bounding_box( self, lower_left_lon: FloatLike, @@ -79,24 +75,24 @@ class GranuleQuery(GranuleCollectionBaseQuery): orbit1: FloatLike, orbit2: Optional[FloatLike] = ..., ) -> Self: ... - def day_night_flag(self, day_night_flag: str) -> Union[Self, Never]: ... + def day_night_flag(self, day_night_flag: str) -> Self: ... def cloud_cover( self, min_cover: Optional[FloatLike] = ..., max_cover: Optional[FloatLike] = ..., ) -> Self: ... - def instrument(self, instrument: str) -> Union[Self, Never]: ... - def platform(self, platform: str) -> Union[Self, Never]: ... - def sort_key(self, sort_key: str) -> Union[Self, Never]: ... - def granule_ur(self, granule_ur: str) -> Union[Self, Never]: ... + def instrument(self, instrument: str) -> Self: ... + def platform(self, platform: str) -> Self: ... + def sort_key(self, sort_key: str) -> Self: ... + def granule_ur(self, granule_ur: str) -> Self: ... class CollectionQuery(GranuleCollectionBaseQuery): def __init__(self, mode: str = ...) -> None: ... def archive_center(self, center: str) -> Self: ... def keyword(self, text: str) -> Self: ... def native_id(self, native_ids: Sequence[str]) -> Self: ... - def tool_concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... - def service_concept_id(self, ids: Sequence[str]) -> Union[Self, Never]: ... + def tool_concept_id(self, ids: Sequence[str]) -> Self: ... + def service_concept_id(self, ids: Sequence[str]) -> Self: ... class ToolServiceVariableBaseQuery(Query): def native_id(self, native_ids: Sequence[str]) -> Self: ... From 4a6586f291d4c3bc8bbffe5f3f6e054d7a6bfe12 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 15 Apr 2024 09:40:00 -0400 Subject: [PATCH 08/12] Restore use of session for CMR paged queries Use of the session during CMR paged queries was accidentally removed with the introduction of Search-After functionality. --- earthaccess/search.py | 10 ++++++---- tests/unit/test_results.py | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/earthaccess/search.py b/earthaccess/search.py index 98ea6b82..1c378d63 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -27,7 +27,9 @@ def get_results( - query: Union[CollectionQuery, GranuleQuery], limit: int = 2000 + session: requests.Session, + query: Union[CollectionQuery, GranuleQuery], + limit: int = 2000, ) -> List[Any]: """ Get all results up to some limit, even if spanning multiple pages. @@ -55,7 +57,7 @@ def get_results( headers = dict(query.headers or {}) while more_results: - response = requests.get(url, headers=headers, params={"page_size": page_size}) + response = session.get(url, headers=headers, params={"page_size": page_size}) if cmr_search_after := response.headers.get("cmr-search-after"): headers["cmr-search-after"] = cmr_search_after @@ -152,7 +154,7 @@ def get(self, limit: int = 2000) -> List[DataCollection]: return [ DataCollection(collection, self._fields) - for collection in get_results(self, limit) + for collection in get_results(self.session, self, limit) ] @override @@ -516,7 +518,7 @@ def get(self, limit: int = 2000) -> List[DataGranule]: Raises: RuntimeError: The CMR query failed. """ - response = get_results(self, limit) + response = get_results(self.session, self, limit) cloud = self._is_cloud_hosted(response[0]) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 6ea5071e..43cdaea1 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -2,6 +2,7 @@ import earthaccess from earthaccess.search import DataCollections + from vcr.unittest import VCRTestCase # type: ignore[import-untyped] logging.basicConfig() @@ -37,6 +38,7 @@ def _get_vcr(self, **kwargs): myvcr.filter_headers = [ "Accept-Encoding", "Authorization", + "Cookie", "Set-Cookie", "User-Agent", ] From 4f2410987394941f55013749d81b24cb5f30a47b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 13:46:38 +0000 Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/test_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 43cdaea1..cc2f8d3a 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -2,7 +2,6 @@ import earthaccess from earthaccess.search import DataCollections - from vcr.unittest import VCRTestCase # type: ignore[import-untyped] logging.basicConfig() From 427cba72d0085500e7dfb5806446c82f4db42691 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 16 Apr 2024 16:08:03 -0400 Subject: [PATCH 10/12] Add typing_extensions as direct dependency --- ci/environment-mindeps.yaml | 1 + earthaccess/typing_.py | 46 ------------------------------------- poetry.lock | 2 +- pyproject.toml | 1 + 4 files changed, 3 insertions(+), 47 deletions(-) delete mode 100644 earthaccess/typing_.py diff --git a/ci/environment-mindeps.yaml b/ci/environment-mindeps.yaml index 3416a5f6..cb8fd367 100644 --- a/ci/environment-mindeps.yaml +++ b/ci/environment-mindeps.yaml @@ -17,6 +17,7 @@ dependencies: - multimethod=1.8 - python-dateutil=2.8.2 - importlib-resources=6.3.2 + - typing-extensions=4.10.0 # test dependencies - responses - pytest diff --git a/earthaccess/typing_.py b/earthaccess/typing_.py deleted file mode 100644 index 3ed39f70..00000000 --- a/earthaccess/typing_.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Convenience module for importing types from the typing module, abstracting away -the differences between Python versions. -""" - -import sys -from typing import Any, Callable, Optional, SupportsFloat, Type, Union, cast - -if sys.version_info < (3, 9): - from typing import Dict, List, Mapping, Sequence, Tuple -else: - from builtins import dict as Dict, list as List, tuple as Tuple - from collections.abc import Mapping, Sequence - -if sys.version_info < (3, 10): - from typing_extensions import TypeAlias -else: - from typing import TypeAlias - -if sys.version_info < (3, 11): - from typing_extensions import Self -else: - from typing import Self - -if sys.version_info < (3, 12): - from typing_extensions import override -else: - from typing import override - -__all__ = [ - "Any", - "Callable", - "Dict", - "List", - "Mapping", - "Optional", - "Self", - "Sequence", - "SupportsFloat", - "Tuple", - "Type", - "TypeAlias", - "Union", - "cast", - "override", -] diff --git a/poetry.lock b/poetry.lock index 04267629..f8c4eba9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5120,4 +5120,4 @@ kerchunk = ["dask", "kerchunk"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "9f38ee827a93f83af0b7b666499b649afa30d6c18e153fea048e0140dbe1e7eb" +content-hash = "530a3cffb6d044e431ec3671268949e797d3c468c0f653b6fea7c90cdc422b3d" diff --git a/pyproject.toml b/pyproject.toml index 14498143..dbe917ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ python-dateutil = ">=2.8.2" kerchunk = { version = ">=0.1.2", optional = true } dask = { version = ">=2022.1.0", optional = true } importlib-resources = ">=6.3.2" +typing_extensions = ">=4.10.0" [tool.poetry.extras] kerchunk = ["kerchunk", "dask"] From ef83a7f75b502aa6f6ab7a46fec2fe8b2f7c988b Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 16 Apr 2024 16:09:09 -0400 Subject: [PATCH 11/12] Use typing_extensions as unified source of types --- earthaccess/api.py | 2 +- earthaccess/search.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 1384614e..796dbcb5 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,6 +1,7 @@ import requests import s3fs from fsspec import AbstractFileSystem +from typing_extensions import Any, Dict, List, Optional, Union import earthaccess @@ -8,7 +9,6 @@ from .results import DataCollection, DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store -from .typing_ import Any, Dict, List, Optional, Union from .utils import _validation as validate diff --git a/earthaccess/search.py b/earthaccess/search.py index 1c378d63..fadd7b55 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -3,13 +3,7 @@ import dateutil.parser as parser import requests - -from cmr import CollectionQuery, GranuleQuery - -from .auth import Auth -from .daac import find_provider, find_provider_by_shortname -from .results import DataCollection, DataGranule -from .typing_ import ( +from typing_extensions import ( Any, List, Optional, @@ -22,6 +16,12 @@ override, ) +from cmr import CollectionQuery, GranuleQuery + +from .auth import Auth +from .daac import find_provider, find_provider_by_shortname +from .results import DataCollection, DataGranule + FloatLike: TypeAlias = Union[str, SupportsFloat] PointLike: TypeAlias = Tuple[FloatLike, FloatLike] From dc519f46db5e08e03d75e1008346a29fbd6d36e4 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 16 Apr 2024 16:09:54 -0400 Subject: [PATCH 12/12] Add brief explanation of type stubs --- CONTRIBUTING.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 300b5387..a8415a1a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,6 +84,15 @@ Finally, for _development dependencies only_, you must add an entry to make format lint ``` +We attempt to provide comprehensive type annotations within this repository. If +you do not provide fully annotated functions or methods, the `lint` command will +fail. Over time, we plan to increase type-checking strictness in order to +ensure more precise, beneficial type annotations. + +We have included type stubs for the untyped `python-cmr` library, which we +intend to eventually upstream. Since `python-cmr` exposes the `cmr` package, +the stubs appear under `stubs/cmr`. + ### Requirements to merge code (Pull Request Process) - you must include test coverage