Skip to content

Commit

Permalink
Fixed some glarring issues
Browse files Browse the repository at this point in the history
  • Loading branch information
Sherwin-14 committed Oct 23, 2024
1 parent 0e76a23 commit fc59282
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 35 deletions.
30 changes: 24 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,30 @@ and this project uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html)
[@betolink](https://github.com/betolink))
- Add example PR links to pull request template
([#756](https://github.com/nsidc/earthaccess/issues/756))
([@Sherwin-14](https://github.com/Sherwin-14),
[@mfisher87](https://github.com/mfisher87))
- Add Contributing Naming Convention document
([#532](https://github.com/nsidc/earthaccess/issues/532))
([@Sherwin-14](https://github.com/Sherwin-14),
[@mfisher87](https://github.com/mfisher87))
[**@Sherwin-14**](https://github.com/Sherwin-14),
[**@mfisher87**](https://github.com/mfisher87)

- Added Contributing Naming Convention document
([#532](https://github.com/nsidc/earthaccess/issues/532))
[**@Sherwin-14**](https://github.com/Sherwin-14),
[**@mfisher87**](https://github.com/mfisher87)

### Fixed

- Removed Broken Link "Introduction to NASA earthaccess"
([#779](https://github.com/nsidc/earthaccess/issues/779))
([**@Sherwin-14**](https://github.com/Sherwin-14))
- Restore automation for tidying notebooks used in documentation
([#788](https://github.com/nsidc/earthaccess/issues/788))
([**@itcarroll**](https://github.com/itcarroll))
- Remove the base class on `EarthAccessFile` to fix method resolution
([#610](https://github.com/nsidc/earthaccess/issues/610))
([**@itcarroll**](https://github.com/itcarroll))
- Fixed earthaccess.download() ignoring errors
([#581](https://github.com/nsidc/earthaccess/issues/581))
([**@Sherwin-14**](https://github.com/Sherwin-14)),
([**@chuckwondo**](https://github.com/chuckwondo),
[**@mfisher87**](https://github.com/mfisher87)

### Removed

Expand Down
4 changes: 2 additions & 2 deletions earthaccess/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
import s3fs
from fsspec import AbstractFileSystem
from typing_extensions import Any, Dict, List, Optional, Union, deprecated, Mapping
from typing_extensions import Any, Dict, List, Mapping, Optional, Union, deprecated

import earthaccess
from earthaccess.services import DataServices
Expand Down Expand Up @@ -238,7 +238,7 @@ def download(
try:
results = earthaccess.__store__.get(
granules, local_path, provider, threads, pqdm_kwargs
)
)
except AttributeError as err:
logger.error(
f"{err}: You must call earthaccess.login() before you can download data"
Expand Down
49 changes: 22 additions & 27 deletions earthaccess/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ def multi_thread_open(data: tuple[str, Optional[DataGranule]]) -> EarthAccessFil
return EarthAccessFile(fs.open(urls), granule) # type: ignore

fileset = pqdm(
url_mapping.items(),
multi_thread_open,
n_jobs=threads,
**pqdm_kwargs
url_mapping.items(), multi_thread_open, n_jobs=threads, **pqdm_kwargs
)
return fileset

Expand Down Expand Up @@ -356,7 +353,7 @@ def open(
A list of "file pointers" to remote (i.e. s3 or https) files.
"""
if len(granules):
return self._open(granules, provider,**pqdm_kwargs)
return self._open(granules, provider, pqdm_kwargs)
return []

@singledispatchmethod
Expand Down Expand Up @@ -410,13 +407,13 @@ def _open_granules(
else:
logger.info(f"using provider: {provider}")
s3_fs = self.get_s3_filesystem(provider=provider)

url_mapping = _get_url_granule_mapping(granules, access)
if s3_fs is not None:
try:
fileset = _open_files(
url_mapping, fs=s3_fs, threads=threads, **pqdm_kwargs
)
url_mapping, fs=s3_fs, threads=threads, pqdm_kwargs=pqdm_kwargs
)
except Exception as e:
raise RuntimeError(
"An exception occurred while trying to access remote files on S3. "
Expand All @@ -425,13 +422,13 @@ def _open_granules(
) from e
else:
fileset = self._open_urls_https(
url_mapping, threads=threads, **pqdm_kwargs
url_mapping, threads=threads, pqdm_kwargs=pqdm_kwargs
)
return fileset
else:
url_mapping = _get_url_granule_mapping(granules, access="on_prem")
fileset = self._open_urls_https(
url_mapping, threads=threads, **pqdm_kwargs
url_mapping, threads=threads, pqdm_kwargs=pqdm_kwargs
)
return fileset

Expand Down Expand Up @@ -466,7 +463,10 @@ def _open_urls(
if s3_fs is not None:
try:
fileset = _open_files(
url_mapping, fs=s3_fs, threads=threads, **pqdm_kwargs
url_mapping,
fs=s3_fs,
threads=threads,
pqdm_kwargs=pqdm_kwargs,
)
except Exception as e:
raise RuntimeError(
Expand All @@ -486,7 +486,7 @@ def _open_urls(
raise ValueError(
"We cannot open S3 links when we are not in-region, try using HTTPS links"
)
fileset = self._open_urls_https(url_mapping, threads,**pqdm_kwargs)
fileset = self._open_urls_https(url_mapping, threads, pqdm_kwargs)
return fileset

def get(
Expand Down Expand Up @@ -523,16 +523,8 @@ def get(
elif isinstance(local_path, str):
local_path = Path(local_path)

pqdm_kwargs = {
"exception_behavior": "immediate",
"n_jobs": threads,
**pqdm_kwargs,
}

if len(granules):
files = self._get(
granules, local_path, provider, threads, **pqdm_kwargs
)
files = self._get(granules, local_path, provider, threads, pqdm_kwargs)
return files
else:
raise ValueError("List of URLs or DataGranule instances expected")
Expand Down Expand Up @@ -597,8 +589,8 @@ def _get_urls(
else:
# if we are not in AWS
return self._download_onprem_granules(
data_links, local_path, threads, **pqdm_kwargs
)
data_links, local_path, threads, pqdm_kwargs
)

@_get.register
def _get_granules(
Expand Down Expand Up @@ -649,7 +641,7 @@ def _get_granules(
# if the data are cloud-based, but we are not in AWS,
# it will be downloaded as if it was on prem
return self._download_onprem_granules(
data_links, local_path, threads, **pqdm_kwargs
data_links, local_path, threads, pqdm_kwargs
)

def _download_file(self, url: str, directory: Path) -> str:
Expand Down Expand Up @@ -688,7 +680,11 @@ def _download_file(self, url: str, directory: Path) -> str:
return str(path)

def _download_onprem_granules(
self, urls: List[str], directory: Path, threads: int = 8, pqdm_kwargs: Optional[Mapping[str, Any]] = None,
self,
urls: List[str],
directory: Path,
threads: int = 8,
pqdm_kwargs: Optional[Mapping[str, Any]] = None,
) -> List[Any]:
"""Downloads a list of URLS into the data directory.
Expand Down Expand Up @@ -719,7 +715,6 @@ def _download_onprem_granules(
self._download_file,
n_jobs=threads,
argument_type="args",
exception_behaviour=exception_behavior,
**pqdm_kwargs
)
return results
Expand All @@ -732,7 +727,7 @@ def _open_urls_https(
https_fs = self.get_fsspec_session()

try:
return _open_files(url_mapping, https_fs, threads,**pqdm_kwargs)
return _open_files(url_mapping, https_fs, threads, **pqdm_kwargs)
except Exception:
logger.exception(
"An exception occurred while trying to access remote files via HTTPS"
Expand Down

0 comments on commit fc59282

Please sign in to comment.