Skip to content

Commit

Permalink
further docstring cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
danielfromearth committed Feb 7, 2024
1 parent 8b6e58b commit 9aea057
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 44 deletions.
12 changes: 7 additions & 5 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth:
Parameters:
strategy:
authentication method.
An authentication method.
* **"all"**: (default) try all methods until one works
* **"interactive"**: enter username and password.
Expand All @@ -138,7 +138,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth:
persist: will persist credentials in a .netrc file
Returns:
an instance of Auth.
An instance of Auth.
"""
if strategy == "all":
for strategy in ["environment", "netrc", "interactive"]:
Expand Down Expand Up @@ -166,8 +166,9 @@ def download(
) -> List[str]:
"""Retrieves data granules from a remote storage system.
* If we run this in the cloud, we will be using S3 to move data to `local_path`
* If we run it outside AWS (us-west-2 region) and the dataset is cloud hostes we'll use HTTP links
* If we run this in the cloud, we will be using S3 to move data to `local_path`.
* If we run it outside AWS (us-west-2 region) and the dataset is cloud hosted,
we'll use HTTP links.
Parameters:
granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP)
Expand Down Expand Up @@ -319,7 +320,8 @@ def get_s3fs_session(
daac: Any DAAC short name e.g. NSIDC, GES_DISC
provider: Each DAAC can have a cloud provider.
If the DAAC is specified, there is no need to use provider.
results: A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint
results: A list of results from search_data().
`earthaccess` will use the metadata from CMR to obtain the S3 Endpoint.
Returns:
an authenticated s3fs session valid for 1 hour
Expand Down
23 changes: 12 additions & 11 deletions earthaccess/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,16 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any:
Parameters:
strategy:
authentication method.
The authentication method.
* **"interactive"**: enter username and password.
* **"netrc"**: (default) retrieve username and password from ~/.netrc.
* **"environment"**: retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD.
persist: will persist credentials in a .netrc file
* **"interactive"**: Enter a username and password.
* **"netrc"**: (default) Retrieve a username and password from ~/.netrc.
* **"environment"**:
Retrieve a username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD.
persist: Will persist credentials in a `.netrc` file.
Returns:
an instance of Auth.
An instance of Auth.
"""
if self.authenticated:
logger.debug("We are already authenticated with NASA EDL")
Expand Down Expand Up @@ -143,16 +144,16 @@ def get_s3_credentials(
provider: Optional[str] = None,
endpoint: Optional[str] = None,
) -> Dict[str, str]:
"""Gets AWS S3 credentials for a given NASA cloud provider. The
easier way is to use the DAAC short name. provider is optional if we know it.
"""Gets AWS S3 credentials for a given NASA cloud provider.
The easier way is to use the DAAC short name; provider is optional if we know it.
Parameters:
daac: the name of a NASA DAAC, i.e. NSIDC or PODAAC
daac: The name of a NASA DAAC, e.g. NSIDC or PODAAC.
provider: A valid cloud provider. Each DAAC has a provider code for their cloud distributions.
endpoint: getting the credentials directly from the S3Credentials URL
endpoint: Getting the credentials directly from the S3Credentials URL.
Returns:
A Python dictionary with the temporary AWS S3 credentials
A Python dictionary with the temporary AWS S3 credentials.
"""
if self.authenticated:
session = SessionWithHeaderRedirection(self.username, self.password)
Expand Down
2 changes: 1 addition & 1 deletion earthaccess/daac.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def find_provider(
if len(daac["cloud-providers"]) > 0:
return daac["cloud-providers"][0]
else:
# We found the DAAC but it does not have cloud data
# We found the DAAC, but it does not have cloud data
return daac["on-prem-providers"][0]
else:
# return on prem provider code
Expand Down
12 changes: 6 additions & 6 deletions earthaccess/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@


def _get_chunk_metadata(
granuale: earthaccess.results.DataGranule,
granule: earthaccess.results.DataGranule,
fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem,
) -> list[dict]:
from kerchunk.hdf import SingleHdf5ToZarr

metadata = []
access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect"
for url in granuale.data_links(access=access):
for url in granule.data_links(access=access):
with fs.open(url) as inf:
h5chunks = SingleHdf5ToZarr(inf, url)
m = h5chunks.translate()
Expand All @@ -23,7 +23,7 @@ def _get_chunk_metadata(


def consolidate_metadata(
granuales: list[earthaccess.results.DataGranule],
granules: list[earthaccess.results.DataGranule],
kerchunk_options: dict | None = None,
access: str = "direct",
outfile: str | None = None,
Expand All @@ -39,13 +39,13 @@ def consolidate_metadata(
) from e

if access == "direct":
fs = earthaccess.get_s3fs_session(provider=granuales[0]["meta"]["provider-id"])
fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"])
else:
fs = earthaccess.get_fsspec_https_session()

# Get metadata for each granuale
# Get metadata for each granule
get_chunk_metadata = dask.delayed(_get_chunk_metadata)
chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granuales])
chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules])
chunks = sum(chunks, start=[])

# Get combined metadata object
Expand Down
4 changes: 2 additions & 2 deletions earthaccess/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,11 @@ def _derive_s3_link(self, links: List[str]) -> List[str]:
def data_links(
self, access: Optional[str] = None, in_region: bool = False
) -> List[str]:
"""Returns the data links form a granule
"""Returns the data links from a granule.
Parameters:
access: direct or external.
direct means in-region access for cloud-hosted collections.
Direct means in-region access for cloud-hosted collections.
in_region: True if we are running in us-west-2.
It is meant for the store class.
Expand Down
14 changes: 8 additions & 6 deletions earthaccess/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def temporal(
self, date_from: str, date_to: str, exclude_boundary: bool = False
) -> Type[CollectionQuery]:
"""Filter by an open or closed date range. Dates can be provided as datetime objects
or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls.
or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls
to this method before calling execute().
Parameters:
Expand Down Expand Up @@ -382,12 +382,14 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]:
return self

def provider(self, provider: str = "") -> Type[CollectionQuery]:
"""Only match collections from a given provider, a NASA datacenter or DAAC can have 1 or more providers
i.e. PODAAC is a data center or DAAC, PODAAC is the default provider for on prem data, POCLOUD is
"""Only match collections from a given provider.
A NASA datacenter or DAAC can have one or more providers.
For example, PODAAC is a data center or DAAC,
PODAAC is the default provider for on-prem data, and POCLOUD is
the PODAAC provider for their data in the cloud.
Parameters:
provider: a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc.
provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc.
"""
self.params["provider"] = provider
return self
Expand Down Expand Up @@ -452,8 +454,8 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]:
queries using the readable_granule_name metadata field.
???+ Tip
We can use wirldcards on a granule name to further refine our search
i.e. MODGRNLD.*.daily.*
We can use wildcards on a granule name to further refine our search,
e.g. MODGRNLD.*.daily.*
Parameters:
granule_name: granule name (accepts wildcards)
Expand Down
6 changes: 3 additions & 3 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None:
self._requests_cookies: Dict[str, Any] = {}
self.set_requests_session(oauth_profile)
if pre_authorize:
# collect cookies from other daacs
# collect cookies from other DAACs
for url in DAAC_TEST_URLS:
self.set_requests_session(url)

Expand Down Expand Up @@ -348,7 +348,7 @@ def _open_granules(
if granules[0].cloud_hosted:
access = "direct"
provider = granules[0]["meta"]["provider-id"]
# if the data has its own S3 credentials endpoint we'll use it
# if the data has its own S3 credentials endpoint, we will use it
endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"])
if endpoint is not None:
print(f"using endpoint: {endpoint}")
Expand Down Expand Up @@ -473,7 +473,7 @@ def get(
files = self._get(granules, local_path, provider, threads)
return files
else:
raise ValueError("List of URLs or DataGranule isntances expected")
raise ValueError("List of URLs or DataGranule instances expected")

@singledispatchmethod
def _get(
Expand Down
20 changes: 10 additions & 10 deletions tests/integration/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@


@pytest.fixture(scope="module")
def granuales():
granuales = earthaccess.search_data(
def granules():
granules = earthaccess.search_data(
count=2,
short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205",
cloud_hosted=True,
)
return granuales
return granules


@pytest.mark.parametrize("protocol", ["", "file://"])
def test_consolidate_metadata_outfile(tmp_path, granuales, protocol):
def test_consolidate_metadata_outfile(tmp_path, granules, protocol):
outfile = f"{protocol}{tmp_path / 'metadata.json'}"
assert not os.path.exists(outfile)
result = earthaccess.consolidate_metadata(
granuales,
granules,
outfile=outfile,
access="indirect",
kerchunk_options={"concat_dims": "Time"},
Expand All @@ -43,9 +43,9 @@ def test_consolidate_metadata_outfile(tmp_path, granuales, protocol):
assert result == outfile


def test_consolidate_metadata_memory(tmp_path, granuales):
def test_consolidate_metadata_memory(tmp_path, granules):
result = earthaccess.consolidate_metadata(
granuales,
granules,
access="indirect",
kerchunk_options={"concat_dims": "Time"},
)
Expand All @@ -54,18 +54,18 @@ def test_consolidate_metadata_memory(tmp_path, granuales):


@pytest.mark.parametrize("output", ["file", "memory"])
def test_consolidate_metadata(tmp_path, granuales, output):
def test_consolidate_metadata(tmp_path, granules, output):
xr = pytest.importorskip("xarray")
# Open directly with `earthaccess.open`
expected = xr.open_mfdataset(earthaccess.open(granuales))
expected = xr.open_mfdataset(earthaccess.open(granules))

# Open with kerchunk consolidated metadata file
if output == "file":
kwargs = {"outfile": tmp_path / "metadata.json"}
else:
kwargs = {}
metadata = earthaccess.consolidate_metadata(
granuales, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs
granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs
)

fs = earthaccess.get_fsspec_https_session()
Expand Down

0 comments on commit 9aea057

Please sign in to comment.