From 9aea057578f6051e2f266ea50e3b7691a3a9e4ad Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 15:55:04 -0500 Subject: [PATCH] further docstring cleanup --- earthaccess/api.py | 12 +++++++----- earthaccess/auth.py | 23 ++++++++++++----------- earthaccess/daac.py | 2 +- earthaccess/kerchunk.py | 12 ++++++------ earthaccess/results.py | 4 ++-- earthaccess/search.py | 14 ++++++++------ earthaccess/store.py | 6 +++--- tests/integration/test_kerchunk.py | 20 ++++++++++---------- 8 files changed, 49 insertions(+), 44 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 9177b37c..9a6422a8 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -129,7 +129,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: Parameters: strategy: - authentication method. + An authentication method. * **"all"**: (default) try all methods until one works * **"interactive"**: enter username and password. @@ -138,7 +138,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: persist: will persist credentials in a .netrc file Returns: - an instance of Auth. + An instance of Auth. """ if strategy == "all": for strategy in ["environment", "netrc", "interactive"]: @@ -166,8 +166,9 @@ def download( ) -> List[str]: """Retrieves data granules from a remote storage system. - * If we run this in the cloud, we will be using S3 to move data to `local_path` - * If we run it outside AWS (us-west-2 region) and the dataset is cloud hostes we'll use HTTP links + * If we run this in the cloud, we will be using S3 to move data to `local_path`. + * If we run it outside AWS (us-west-2 region) and the dataset is cloud hosted, + we'll use HTTP links. Parameters: granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP) @@ -319,7 +320,8 @@ def get_s3fs_session( daac: Any DAAC short name e.g. NSIDC, GES_DISC provider: Each DAAC can have a cloud provider. If the DAAC is specified, there is no need to use provider. - results: A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint + results: A list of results from search_data(). + `earthaccess` will use the metadata from CMR to obtain the S3 Endpoint. Returns: an authenticated s3fs session valid for 1 hour diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 9b94e8d5..004efa7c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -65,15 +65,16 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: Parameters: strategy: - authentication method. + The authentication method. - * **"interactive"**: enter username and password. - * **"netrc"**: (default) retrieve username and password from ~/.netrc. - * **"environment"**: retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. - persist: will persist credentials in a .netrc file + * **"interactive"**: Enter a username and password. + * **"netrc"**: (default) Retrieve a username and password from ~/.netrc. + * **"environment"**: + Retrieve a username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. + persist: Will persist credentials in a `.netrc` file. Returns: - an instance of Auth. + An instance of Auth. """ if self.authenticated: logger.debug("We are already authenticated with NASA EDL") @@ -143,16 +144,16 @@ def get_s3_credentials( provider: Optional[str] = None, endpoint: Optional[str] = None, ) -> Dict[str, str]: - """Gets AWS S3 credentials for a given NASA cloud provider. The - easier way is to use the DAAC short name. provider is optional if we know it. + """Gets AWS S3 credentials for a given NASA cloud provider. + The easier way is to use the DAAC short name; provider is optional if we know it. Parameters: - daac: the name of a NASA DAAC, i.e. NSIDC or PODAAC + daac: The name of a NASA DAAC, e.g. NSIDC or PODAAC. provider: A valid cloud provider. Each DAAC has a provider code for their cloud distributions. - endpoint: getting the credentials directly from the S3Credentials URL + endpoint: Getting the credentials directly from the S3Credentials URL. Returns: - A Python dictionary with the temporary AWS S3 credentials + A Python dictionary with the temporary AWS S3 credentials. """ if self.authenticated: session = SessionWithHeaderRedirection(self.username, self.password) diff --git a/earthaccess/daac.py b/earthaccess/daac.py index a15972c1..4f0e99f3 100644 --- a/earthaccess/daac.py +++ b/earthaccess/daac.py @@ -128,7 +128,7 @@ def find_provider( if len(daac["cloud-providers"]) > 0: return daac["cloud-providers"][0] else: - # We found the DAAC but it does not have cloud data + # We found the DAAC, but it does not have cloud data return daac["on-prem-providers"][0] else: # return on prem provider code diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index eb3f4cae..02533a0e 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -7,14 +7,14 @@ def _get_chunk_metadata( - granuale: earthaccess.results.DataGranule, + granule: earthaccess.results.DataGranule, fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" - for url in granuale.data_links(access=access): + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() @@ -23,7 +23,7 @@ def _get_chunk_metadata( def consolidate_metadata( - granuales: list[earthaccess.results.DataGranule], + granules: list[earthaccess.results.DataGranule], kerchunk_options: dict | None = None, access: str = "direct", outfile: str | None = None, @@ -39,13 +39,13 @@ def consolidate_metadata( ) from e if access == "direct": - fs = earthaccess.get_s3fs_session(provider=granuales[0]["meta"]["provider-id"]) + fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"]) else: fs = earthaccess.get_fsspec_https_session() - # Get metadata for each granuale + # Get metadata for each granule get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granuales]) + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) chunks = sum(chunks, start=[]) # Get combined metadata object diff --git a/earthaccess/results.py b/earthaccess/results.py index 4861f781..3faaf8fc 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -284,11 +284,11 @@ def _derive_s3_link(self, links: List[str]) -> List[str]: def data_links( self, access: Optional[str] = None, in_region: bool = False ) -> List[str]: - """Returns the data links form a granule + """Returns the data links from a granule. Parameters: access: direct or external. - direct means in-region access for cloud-hosted collections. + Direct means in-region access for cloud-hosted collections. in_region: True if we are running in us-west-2. It is meant for the store class. diff --git a/earthaccess/search.py b/earthaccess/search.py index 15f0b49f..e8074368 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -273,7 +273,7 @@ def temporal( self, date_from: str, date_to: str, exclude_boundary: bool = False ) -> Type[CollectionQuery]: """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls. + or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls to this method before calling execute(). Parameters: @@ -382,12 +382,14 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self def provider(self, provider: str = "") -> Type[CollectionQuery]: - """Only match collections from a given provider, a NASA datacenter or DAAC can have 1 or more providers - i.e. PODAAC is a data center or DAAC, PODAAC is the default provider for on prem data, POCLOUD is + """Only match collections from a given provider. + A NASA datacenter or DAAC can have one or more providers. + For example, PODAAC is a data center or DAAC, + PODAAC is the default provider for on-prem data, and POCLOUD is the PODAAC provider for their data in the cloud. Parameters: - provider: a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc. + provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. """ self.params["provider"] = provider return self @@ -452,8 +454,8 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: queries using the readable_granule_name metadata field. ???+ Tip - We can use wirldcards on a granule name to further refine our search - i.e. MODGRNLD.*.daily.* + We can use wildcards on a granule name to further refine our search, + e.g. MODGRNLD.*.daily.* Parameters: granule_name: granule name (accepts wildcards) diff --git a/earthaccess/store.py b/earthaccess/store.py index 1a615631..e20ef10e 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -106,7 +106,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None: self._requests_cookies: Dict[str, Any] = {} self.set_requests_session(oauth_profile) if pre_authorize: - # collect cookies from other daacs + # collect cookies from other DAACs for url in DAAC_TEST_URLS: self.set_requests_session(url) @@ -348,7 +348,7 @@ def _open_granules( if granules[0].cloud_hosted: access = "direct" provider = granules[0]["meta"]["provider-id"] - # if the data has its own S3 credentials endpoint we'll use it + # if the data has its own S3 credentials endpoint, we will use it endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"]) if endpoint is not None: print(f"using endpoint: {endpoint}") @@ -473,7 +473,7 @@ def get( files = self._get(granules, local_path, provider, threads) return files else: - raise ValueError("List of URLs or DataGranule isntances expected") + raise ValueError("List of URLs or DataGranule instances expected") @singledispatchmethod def _get( diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 39c95e99..58f93077 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -20,21 +20,21 @@ @pytest.fixture(scope="module") -def granuales(): - granuales = earthaccess.search_data( +def granules(): + granules = earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granuales + return granules @pytest.mark.parametrize("protocol", ["", "file://"]) -def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): +def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" assert not os.path.exists(outfile) result = earthaccess.consolidate_metadata( - granuales, + granules, outfile=outfile, access="indirect", kerchunk_options={"concat_dims": "Time"}, @@ -43,9 +43,9 @@ def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granuales): +def test_consolidate_metadata_memory(tmp_path, granules): result = earthaccess.consolidate_metadata( - granuales, + granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, ) @@ -54,10 +54,10 @@ def test_consolidate_metadata_memory(tmp_path, granuales): @pytest.mark.parametrize("output", ["file", "memory"]) -def test_consolidate_metadata(tmp_path, granuales, output): +def test_consolidate_metadata(tmp_path, granules, output): xr = pytest.importorskip("xarray") # Open directly with `earthaccess.open` - expected = xr.open_mfdataset(earthaccess.open(granuales)) + expected = xr.open_mfdataset(earthaccess.open(granules)) # Open with kerchunk consolidated metadata file if output == "file": @@ -65,7 +65,7 @@ def test_consolidate_metadata(tmp_path, granuales, output): else: kwargs = {} metadata = earthaccess.consolidate_metadata( - granuales, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs + granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) fs = earthaccess.get_fsspec_https_session()