From e3e765200c72004db897557816cab454bd131c03 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Fri, 5 Jan 2024 14:36:23 -0500 Subject: [PATCH] Revert "Expand Variables class to read s3 urls (#464)" This reverts commit 8d7db9c5152f830e65aa599731dc19a99c845057. --- .../IS2_data_variables.ipynb | 13 +-- icepyx/core/is2ref.py | 110 +++++------------- icepyx/core/query.py | 4 - icepyx/core/validate_inputs.py | 13 --- icepyx/core/variables.py | 33 +++--- 5 files changed, 51 insertions(+), 122 deletions(-) diff --git a/doc/source/example_notebooks/IS2_data_variables.ipynb b/doc/source/example_notebooks/IS2_data_variables.ipynb index c66445731..78a250789 100644 --- a/doc/source/example_notebooks/IS2_data_variables.ipynb +++ b/doc/source/example_notebooks/IS2_data_variables.ipynb @@ -15,7 +15,7 @@ "\n", "A given ICESat-2 product may have over 200 variable + path combinations.\n", "icepyx includes a custom `Variables` module that is \"aware\" of the ATLAS sensor and how the ICESat-2 data products are stored.\n", - "The module can be accessed independently and can also be accessed as a component of a `Query` object or `Read` object.\n", + "The module can be accessed independently, and can also be accessed as a component of a `Query` object or `Read` object.\n", "\n", "This notebook illustrates in detail how the `Variables` module behaves. We use the module independently and also show how powerful it is directly in the icepyx workflow using a `Query` data access example.\n", "Module usage using `Query` is analogous through an icepyx ICESat-2 `Read` object.\n", @@ -75,7 +75,7 @@ "There are three ways to create or access an ICESat-2 Variables object in icepyx:\n", "1. Access via the `.order_vars` property of a Query object\n", "2. Access via the `.vars` property of a Read object\n", - "3. Create a stand-alone ICESat-2 Variables object using a local file, cloud file, or a product name\n", + "3. Create a stand-alone ICESat-2 Variables object using a local file or a product name\n", "\n", "An example of each of these is shown below." ] @@ -180,11 +180,8 @@ "### 3. Create a stand-alone Variables object\n", "\n", "You can also generate an independent Variables object. This can be done using either:\n", - "1. The filepath to a local or cloud file you'd like a variables list for\n", - "2. The product name (and optionally version) of a an ICESat-2 product\n", - "\n", - "*Note: Cloud data access requires a valid Earthdata login; \n", - "you will be prompted to log in if you are not already authenticated.*" + "1. The filepath to a file you'd like a variables list for\n", + "2. The product name (and optionally version) of a an ICESat-2 product" ] }, { @@ -258,7 +255,7 @@ }, "outputs": [], "source": [ - "v = ipx.Variables(product='ATL03', version='006')" + "v = ipx.Variables(product='ATL03', version='004')" ] }, { diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index d49d15f04..a90c8fafa 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -5,10 +5,11 @@ import warnings from xml.etree import ElementTree as ET -import earthaccess +import icepyx # ICESat-2 specific reference functions +# options to get customization options for ICESat-2 data (though could be used generally) def _validate_product(product): @@ -47,6 +48,9 @@ def _validate_product(product): return product +# DevGoal: See if there's a way to dynamically get this list so it's automatically updated + + def _validate_OA_product(product): """ Confirm a valid ICESat-2 product was specified @@ -83,7 +87,6 @@ def about_product(prod): # DevGoal: use a mock of this output to test later functions, such as displaying options and widgets, etc. -# options to get customization options for ICESat-2 data (though could be used generally) def _get_custom_options(session, product, version): """ Get lists of what customization options are available for the product from NSIDC. @@ -327,7 +330,6 @@ def gt2spot(gt, sc_orient): return np.uint8(spot) - def latest_version(product): """ Determine the most recent version available for the given product. @@ -338,86 +340,38 @@ def latest_version(product): '006' """ _about_product = about_product(product) - return max([entry["version_id"] for entry in _about_product["feed"]["entry"]]) - + return max( + [entry["version_id"] for entry in _about_product["feed"]["entry"]] + ) -def extract_product(filepath, auth=None): +def extract_product(filepath): """ - Read the product type from the metadata of the file. Valid for local or s3 files, but must - provide an auth object if reading from s3. Return the product as a string. - - Parameters - ---------- - filepath: string - local or remote location of a file. Could be a local string or an s3 filepath - auth: earthaccess.auth.Auth, default None - An earthaccess authentication object. Optional, but necessary if accessing data in an - s3 bucket. + Read the product type from the metadata of the file. Return the product as a string. """ - # Generate a file reader object relevant for the file location - if filepath.startswith("s3"): - if not auth: - raise AttributeError( - "Must provide credentials to `auth` if accessing s3 data" - ) - # Read the s3 file - s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth) - f = h5py.File(s3.open(filepath, "rb")) - else: - # Otherwise assume a local filepath. Read with h5py. - f = h5py.File(filepath, "r") - - # Extract the product information - try: - product = f.attrs["short_name"] - if isinstance(product, bytes): - # For most products the short name is stored in a bytes string - product = product.decode() - elif isinstance(product, np.ndarray): - # ATL14 saves the short_name as an array ['ATL14'] - product = product[0] - product = _validate_product(product) - except KeyError: - raise "Unable to parse the product name from file metadata" - # Close the file reader - f.close() + with h5py.File(filepath, 'r') as f: + try: + product = f.attrs['short_name'] + if isinstance(product, bytes): + # For most products the short name is stored in a bytes string + product = product.decode() + elif isinstance(product, np.ndarray): + # ATL14 saves the short_name as an array ['ATL14'] + product = product[0] + product = _validate_product(product) + except KeyError: + raise 'Unable to parse the product name from file metadata' return product - -def extract_version(filepath, auth=None): +def extract_version(filepath): """ - Read the version from the metadata of the file. Valid for local or s3 files, but must - provide an auth object if reading from s3. Return the version as a string. - - Parameters - ---------- - filepath: string - local or remote location of a file. Could be a local string or an s3 filepath - auth: earthaccess.auth.Auth, default None - An earthaccess authentication object. Optional, but necessary if accessing data in an - s3 bucket. + Read the version from the metadata of the file. Return the version as a string. """ - # Generate a file reader object relevant for the file location - if filepath.startswith("s3"): - if not auth: - raise AttributeError( - "Must provide credentials to `auth` if accessing s3 data" - ) - # Read the s3 file - s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth) - f = h5py.File(s3.open(filepath, "rb")) - else: - # Otherwise assume a local filepath. Read with h5py. - f = h5py.File(filepath, "r") - - # Read the version information - try: - version = f["METADATA"]["DatasetIdentification"].attrs["VersionID"] - if isinstance(version, np.ndarray): - # ATL14 stores the version as an array ['00x'] - version = version[0] - except KeyError: - raise "Unable to parse the version from file metadata" - # Close the file reader - f.close() + with h5py.File(filepath, 'r') as f: + try: + version = f['METADATA']['DatasetIdentification'].attrs['VersionID'] + if isinstance(version, np.ndarray): + # ATL14 stores the version as an array ['00x'] + version = version[0] + except KeyError: + raise 'Unable to parse the version from file metadata' return version diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 4ffe4c241..8700d5655 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -350,10 +350,6 @@ class Query(GenQuery, EarthdataAuthMixin): reference ground tracks are used. Example: "0594" files : string, default None A placeholder for future development. Not used for any purposes yet. - auth : earthaccess.auth.Auth, default None - An earthaccess authentication object. Available as an argument so an existing - earthaccess.auth.Auth object can be used for authentication. If not given, a new auth - object will be created whenever authentication is needed. Returns ------- diff --git a/icepyx/core/validate_inputs.py b/icepyx/core/validate_inputs.py index d74768eea..c7ba55a6d 100644 --- a/icepyx/core/validate_inputs.py +++ b/icepyx/core/validate_inputs.py @@ -104,16 +104,3 @@ def tracks(track): warnings.warn("Listed Reference Ground Track is not available") return track_list - -def check_s3bucket(path): - """ - Check if the given path is an s3 path. Raise a warning if the data being referenced is not - in the NSIDC bucket - """ - split_path = path.split('/') - if split_path[0] == 's3:' and split_path[2] != 'nsidc-cumulus-prod-protected': - warnings.warn( - 's3 data being read from outside the NSIDC data bucket. Icepyx can ' - 'read this data, but available data lists may not be accurate.', stacklevel=2 - ) - return path diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index 4c52003df..94645ca94 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -48,10 +48,11 @@ class Variables(EarthdataAuthMixin): Dictionary (key:values) of available variable names (keys) and paths (values). wanted : dictionary, default None As avail, but for the desired list of variables - auth : earthaccess.auth.Auth, default None - An earthaccess authentication object. Available as an argument so an existing - earthaccess.auth.Auth object can be used for authentication. If not given, a new auth - object will be created whenever authentication is needed. + session : requests.session object + A session object authenticating the user to download data using their Earthdata login information. + The session object will automatically be passed from the query object if you + have successfully logged in there. + """ def __init__( @@ -74,25 +75,16 @@ def __init__( if path and product: raise TypeError( - 'Please provide either a path or a product. If a path is provided ', + 'Please provide either a filepath or a product. If a filepath is provided ', 'variables will be read from the file. If a product is provided all available ', 'variables for that product will be returned.' ) - - # initialize authentication properties - EarthdataAuthMixin.__init__(self, auth=auth) # Set the product and version from either the input args or the file if path: - self._path = val.check_s3bucket(path) - # Set up auth - if self._path.startswith('s3'): - auth = self.auth - else: - auth = None - # Read the product and version from the file - self._product = is2ref.extract_product(self._path, auth=auth) - self._version = is2ref.extract_version(self._path, auth=auth) + self._path = path + self._product = is2ref.extract_product(self._path) + self._version = is2ref.extract_version(self._path) elif product: # Check for valid product string self._product = is2ref._validate_product(product) @@ -100,7 +92,10 @@ def __init__( # If version is not specified by the user assume the most recent version self._version = val.prod_version(is2ref.latest_version(self._product), version) else: - raise TypeError('Either a path or a product need to be given as input arguments.') + raise TypeError('Either a filepath or a product need to be given as input arguments.') + + # initialize authentication properties + EarthdataAuthMixin.__init__(self, auth=auth) self._avail = avail self.wanted = wanted @@ -143,7 +138,7 @@ def avail(self, options=False, internal=False): """ if not hasattr(self, "_avail") or self._avail == None: - if not hasattr(self, 'path') or self.path.startswith('s3'): + if not hasattr(self, 'path'): self._avail = is2ref._get_custom_options( self.session, self.product, self.version )["variables"]