From 95be04cef4491da5e1eee6070fae481f40689c59 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Fri, 10 Nov 2023 11:50:10 -0600 Subject: [PATCH] Raise exceptions more --- earthaccess/store.py | 76 ++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/earthaccess/store.py b/earthaccess/store.py index cfe7bc79..68fec997 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -230,10 +230,9 @@ def get_s3fs_session( self.initial_ts = datetime.datetime.now() return deepcopy(self.s3_fs) else: - print( + raise ValueError( "A valid Earthdata login instance is required to retrieve S3 credentials" ) - return None @lru_cache def get_fsspec_session(self) -> fsspec.AbstractFileSystem: @@ -269,7 +268,7 @@ def open( self, granules: Union[List[str], List[DataGranule]], provider: Optional[str] = None, - ) -> Union[List[Any], None]: + ) -> List[Any]: """Returns a list of fsspec file-like objects that can be used to access files hosted on S3 or HTTPS by third party libraries like xarray. @@ -280,15 +279,14 @@ def open( """ if len(granules): return self._open(granules, provider) - print("The granules list is empty, moving on...") - return None + return [] @singledispatchmethod def _open( self, granules: Union[List[str], List[DataGranule]], provider: Optional[str] = None, - ) -> Union[List[Any], None]: + ) -> List[Any]: """Returns a list of fsspec file-like objects that can be used to access files hosted on S3 or HTTPS by third party libraries like xarray. @@ -305,17 +303,16 @@ def _open_granules( granules: List[DataGranule], provider: Optional[str] = None, threads: Optional[int] = 8, - ) -> Union[List[Any], None]: + ) -> List[Any]: fileset: List = [] data_links: List = [] total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) - print(f" Opening {len(granules)} granules, approx size: {total_size} GB") + print(f"Opening {len(granules)} granules, approx size: {total_size} GB") if self.auth is None: - print( + raise ValueError( "A valid Earthdata login instance is required to retrieve credentials" ) - return None if self.running_in_aws: if granules[0].cloud_hosted: @@ -347,13 +344,12 @@ def _open_granules( fs=s3_fs, threads=threads, ) - except Exception: - print( - "An exception occurred while trying to access remote files on S3: " - "This may be caused by trying to access the data outside the us-west-2 region" + except Exception as e: + raise RuntimeError( + "An exception occurred while trying to access remote files on S3. " + "This may be caused by trying to access the data outside the us-west-2 region." f"Exception: {traceback.format_exc()}" - ) - return None + ) from e else: fileset = self._open_urls_https(data_links, granules, threads=threads) return fileset @@ -373,7 +369,7 @@ def _open_urls( granules: List[str], provider: Optional[str] = None, threads: Optional[int] = 8, - ) -> Union[List[Any], None]: + ) -> List[Any]: fileset: List = [] data_links: List = [] @@ -384,15 +380,13 @@ def _open_urls( provider = provider data_links = granules else: - print( + raise ValueError( f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL" ) - return None if self.auth is None: - print( + raise ValueError( "A valid Earthdata login instance is required to retrieve S3 credentials" ) - return None if self.running_in_aws and granules[0].startswith("s3"): if provider is not None: @@ -405,27 +399,24 @@ def _open_urls( fs=s3_fs, threads=threads, ) - except Exception: - print( - "An exception occurred while trying to access remote files on S3: " - "This may be caused by trying to access the data outside the us-west-2 region" + except Exception as e: + raise RuntimeError( + "An exception occurred while trying to access remote files on S3. " + "This may be caused by trying to access the data outside the us-west-2 region." f"Exception: {traceback.format_exc()}" - ) - return None + ) from e else: print(f"Provider {provider} has no valid cloud credentials") return fileset else: - print( + raise ValueError( "earthaccess cannot derive the DAAC provider from URLs only, a provider is needed e.g. POCLOUD" ) - return None else: if granules[0].startswith("s3"): - print( + raise ValueError( "We cannot open S3 links when we are not in-region, try using HTTPS links" ) - return None fileset = self._open_urls_https(data_links, granules, threads) return fileset @@ -435,7 +426,7 @@ def get( local_path: Optional[str] = None, provider: Optional[str] = None, threads: int = 8, - ) -> Union[None, List[str]]: + ) -> List[str]: """Retrieves data granules from a remote storage system. * If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda) @@ -463,8 +454,7 @@ def get( files = self._get(granules, local_path, provider, threads) return files else: - print("List of URLs or DataGranule isntances expected") - return None + raise ValueError("List of URLs or DataGranule isntances expected") @singledispatchmethod def _get( @@ -473,7 +463,7 @@ def _get( local_path: str, provider: Optional[str] = None, threads: int = 8, - ) -> Union[None, List[str]]: + ) -> List[str]: """Retrieves data granules from a remote storage system. * If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda) @@ -491,8 +481,7 @@ def _get( Returns: None """ - print("List of URLs or DataGranule isntances expected") - return None + raise NotImplementedError(f"Cannot _get {granules}") @_get.register def _get_urls( @@ -501,15 +490,14 @@ def _get_urls( local_path: str, provider: Optional[str] = None, threads: int = 8, - ) -> Union[None, List[str]]: + ) -> List[str]: data_links = granules downloaded_files: List = [] if provider is None and self.running_in_aws and "cumulus" in data_links[0]: - print( + raise ValueError( "earthaccess can't yet guess the provider for cloud collections, " "we need to use one from earthaccess.list_cloud_providers()" ) - return None if self.running_in_aws and data_links[0].startswith("s3"): print(f"Accessing cloud dataset using provider: {provider}") s3_fs = self.get_s3fs_session(provider=provider) @@ -532,7 +520,7 @@ def _get_granules( local_path: str, provider: Optional[str] = None, threads: int = 8, - ) -> Union[None, List[str]]: + ) -> List[str]: data_links: List = [] downloaded_files: List = [] provider = granules[0]["meta"]["provider-id"] @@ -615,13 +603,11 @@ def _download_onprem_granules( :returns: None """ if urls is None: - print("The granules didn't provide a valid GET DATA link") - return None + raise ValueError("The granules didn't provide a valid GET DATA link") if self.auth is None: - print( + raise ValueError( "We need to be logged into NASA EDL in order to download data granules" ) - return [] if not os.path.exists(directory): os.makedirs(directory)