From 8263068417f16671806ef26f78dd0a424a5d5461 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Fri, 5 Jan 2024 14:36:23 -0500 Subject: [PATCH] Revert "Expand icepyx to read s3 data (#468)" This reverts commit 690ef1795614ac11776935902917580295d38b3b. --- .../IS2_cloud_data_access.ipynb | 282 +++--------------- .../documentation/classes_dev_uml.svg | 111 ++++--- .../documentation/classes_user_uml.svg | 77 +++-- .../documentation/packages_user_uml.svg | 100 +++---- icepyx/core/is2ref.py | 10 +- icepyx/core/read.py | 100 +------ icepyx/core/variables.py | 1 - 7 files changed, 191 insertions(+), 490 deletions(-) diff --git a/doc/source/example_notebooks/IS2_cloud_data_access.ipynb b/doc/source/example_notebooks/IS2_cloud_data_access.ipynb index 5bf5d1a98..fa0931c8a 100644 --- a/doc/source/example_notebooks/IS2_cloud_data_access.ipynb +++ b/doc/source/example_notebooks/IS2_cloud_data_access.ipynb @@ -12,59 +12,35 @@ "## Notes\n", "1. ICESat-2 data became publicly available on the cloud on 29 September 2022. Thus, access methods and example workflows are still being developed by NSIDC, and the underlying code in icepyx will need to be updated now that these data (and the associated metadata) are available. We appreciate your patience and contributions (e.g. reporting bugs, sharing your code, etc.) during this transition!\n", "2. This example and the code it describes are part of ongoing development. Current limitations to using these features are described throughout the example, as appropriate.\n", - "3. You **MUST** be working within an AWS instance. Otherwise, you will get a permissions error." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "## Querying for data and finding s3 urls" + "3. You **MUST** be working within an AWS instance. Otherwise, you will get a permissions error.\n", + "4. Cloud authentication is still more user-involved than we'd like. We're working to address this - let us know if you'd like to join the conversation!" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ + "import earthaccess\n", "import icepyx as ipx" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Make sure the user sees important warnings if they try to read a lot of data from the cloud\n", - "import warnings\n", - "warnings.filterwarnings(\"always\")" - ] - }, { "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, + "metadata": {}, "source": [ - "We will start the way we often do: by creating an icepyx Query object." + "Create an icepyx Query object" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ + "# bounding box\n", + "# \"producerGranuleId\": \"ATL03_20191130221008_09930503_004_01.h5\",\n", "short_name = 'ATL03'\n", "spatial_extent = [-45, 58, -35, 75]\n", "date_range = ['2019-11-30','2019-11-30']" @@ -73,32 +49,25 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "reg = ipx.Query(short_name, spatial_extent, date_range)" + "reg=ipx.Query(short_name, spatial_extent, date_range)" ] }, { "cell_type": "markdown", - "metadata": { - "tags": [], - "user_expressions": [] - }, + "metadata": {}, "source": [ - "### Get the granule s3 urls\n", - "\n", - "With this query object you can get a list of available granules. This function returns a list containing the list of the granule IDs and a list of the corresponding urls. Use `cloud=True` to get the needed s3 urls." + "## Get the granule s3 urls\n", + "You must specify `cloud=True` to get the needed s3 urls.\n", + "This function returns a list containing the list of the granule IDs and a list of the corresponding urls." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "gran_ids = reg.avail_granules(ids=True, cloud=True)\n", @@ -111,114 +80,19 @@ "user_expressions": [] }, "source": [ - "## Determining variables of interest" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "There are several ways to view available variables. One is to use the existing Query object:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "reg.order_vars.avail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "Another way is to use the variables module:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ipx.Variables(product=short_name).avail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "We can also do this using a specific s3 filepath from the Query object:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ipx.Variables(path=gran_ids[1][0]).avail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "From any of these methods we can see that `h_ph` is a variable for this data product, so we will read that variable in the next step." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "#### A Note on listing variables using s3 urls\n", + "## Log in to Earthdata and generate an s3 token\n", + "You can use icepyx's existing login functionality to generate your s3 data access token, which will be valid for *one* hour. The icepyx module will renew the token for you after an hour, but if viewing your token over the course of several hours you may notice the values will change.\n", "\n", - "We can use the Variables module with an s3 url to explore available data variables the same way we do with local files. An important difference, however, is how the available variables list is created. When reading a local file the variables module will traverse the entire file and search for variables that are present in that file. This method it too time intensive with the s3 data, so instead the the product / version of the data product is read from the file and all possible variables associated with that product/version are reporting as available. As long as you are using the NSIDC provided s3 paths provided via Earthdata search and the Query object these lists will be the same." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [], - "user_expressions": [] - }, - "source": [ - "#### A Note on authentication\n", - "\n", - "Notice that accessing cloud data requires two layers of authentication: 1) authenticating with your Earthdata Login 2) authenticating for cloud access. These both happen behind the scenes, without the need for users to provide any explicit commands.\n", - "\n", - "Icepyx uses earthaccess to generate your s3 data access token, which will be valid for *one* hour. Icepyx will also renew the token for you after an hour, so if viewing your token over the course of several hours you may notice the values will change.\n", - "\n", - "If you do want to see your s3 credentials, you can access them using:" + "You can access your s3 credentials using:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "# uncommenting the line below will print your temporary aws login credentials\n", + "# uncommenting the line below will print your temporary login credentials\n", "# reg.s3login_credentials" ] }, @@ -237,136 +111,68 @@ }, { "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "## Choose a data file and access the data\n", - "\n", - "**Note: If you get a PermissionDenied Error when trying to read in the data, you may not be sending your request from an AWS hub in us-west2. We're currently working on how to alert users if they will not be able to access ICESat-2 data in the cloud for this reason**\n", - "\n", - "We are ready to read our data! We do this by creating a reader object and using the s3 url returned from the Query object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# the first index, [1], gets us into the list of s3 urls\n", - "# the second index, [0], gets us the first entry in that list.\n", - "s3url = gran_ids[1][0]\n", - "# s3url = 's3://nsidc-cumulus-prod-protected/ATLAS/ATL03/004/2019/11/30/ATL03_20191130221008_09930503_004_01.h5'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [], - "user_expressions": [] - }, + "metadata": {}, "source": [ - "Create the Read object" + "## Set up your s3 file system using your credentials" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "reader = ipx.Read(s3url)" + "s3 = earthaccess.get_s3fs_session(daac='NSIDC', provider=reg.s3login_credentials)" ] }, { "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, + "metadata": {}, "source": [ - "This reader object gives us yet another way to view available variables." + "## Select an s3 url and access the data\n", + "Data read in capabilities for cloud data are coming soon in icepyx (targeted Spring 2023). Stay tuned and we'd love for you to join us and contribute!\n", + "\n", + "**Note: If you get a PermissionDenied Error when trying to read in the data, you may not be sending your request from an AWS hub in us-west2. We're currently working on how to alert users if they will not be able to access ICESat-2 data in the cloud for this reason**" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "reader.vars.avail()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "Next, we append our desired variable to the `wanted_vars` list:" + "# the first index, [1], gets us into the list of s3 urls\n", + "# the second index, [0], gets us the first entry in that list.\n", + "s3url = gran_ids[1][0]\n", + "# s3url = 's3://nsidc-cumulus-prod-protected/ATLAS/ATL03/004/2019/11/30/ATL03_20191130221008_09930503_004_01.h5'" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "reader.vars.append(var_list=['h_ph'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "Finally, we load the data" + "import h5py\n", + "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "\n", - "# This may take 5-10 minutes\n", - "reader.load()" + "%time f = h5py.File(s3.open(s3url,'rb'),'r')" ] }, { "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, - "source": [ - "### Some important caveats\n", - "\n", - "While the cloud data reading is functional within icepyx, it is very slow. Approximate timing shows it takes ~6 minutes of load time per variable per file from s3. Because of this you will recieve a warning if you try to load either more than three variables or two files at once.\n", - "\n", - "The slow load speed is a demonstration of the many steps involved in making cloud data actionable - the data supply chain needs optimized source data, efficient low level data readers, and high level libraries which are enabled to use the fastest low level data readers. Not all of these pieces fully developed right now, but the progress being made it exciting and there is lots of room for contribution!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "user_expressions": [] - }, + "metadata": {}, "source": [ "#### Credits\n", - "* notebook by: Jessica Scheick and Rachel Wegener" + "* notebook by: Jessica Scheick\n", + "* historic source material: [is2-nsidc-cloud.py](https://gist.github.com/bradlipovsky/80ab6a7aff3d3524b9616a9fc176065e#file-is2-nsidc-cloud-py-L28) by Brad Lipovsky" ] } ], @@ -386,7 +192,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index c5cd801aa..3409a38e5 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -30,20 +30,20 @@ icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -_auth : NoneType -_s3_initial_ts : NoneType, datetime -_s3login_credentials : NoneType -_session : NoneType -auth -s3login_credentials -session - -__init__(auth) -__str__() -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +_auth : NoneType +_s3_initial_ts : NoneType, datetime +_s3login_credentials : NoneType +_session : NoneType +auth +s3login_credentials +session + +__init__(auth) +__str__() +earthdata_login(uid, email, s3token): None @@ -120,14 +120,14 @@ visualize_spatial_extent() - + icepyx.core.granules.Granules->icepyx.core.query.Query _granules - + icepyx.core.granules.Granules->icepyx.core.query.Query @@ -190,28 +190,28 @@ check_values() - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _CMRparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _reqparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _subsetparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query @@ -220,8 +220,8 @@ icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + @@ -232,32 +232,25 @@ icepyx.core.read.Read - -Read - -_filelist : NoneType, list -_out_obj : Dataset -_product : NoneType, str -_read_vars -filelist -is_s3 -product -vars - -__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) -_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) -_read_single_grp(file, grp_path) -load() - - - -icepyx.core.read.Read->icepyx.core.auth.EarthdataAuthMixin - - + +Read + +_filelist : NoneType, list +_out_obj : Dataset +_product : NoneType, str +_read_vars +filelist +product +vars + +__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() @@ -281,14 +274,14 @@ fmt_for_EGI() - + icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery _spatial - + icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery @@ -309,7 +302,7 @@ __str__() - + icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery @@ -342,30 +335,30 @@ remove(all, var_list, beam_list, keyword_list) - + icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + - + icepyx.core.variables.Variables->icepyx.core.query.Query _order_vars - + icepyx.core.variables.Variables->icepyx.core.query.Query _order_vars - + icepyx.core.variables.Variables->icepyx.core.read.Read - - + + _read_vars diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index 256cc1794..d52b1c779 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -30,14 +30,14 @@ icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -auth -s3login_credentials -session - -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +auth +s3login_credentials +session + +earthdata_login(uid, email, s3token): None @@ -95,14 +95,14 @@ visualize_spatial_extent() - + icepyx.core.granules.Granules->icepyx.core.query.Query _granules - + icepyx.core.granules.Granules->icepyx.core.query.Query @@ -158,28 +158,28 @@ check_values() - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _CMRparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _reqparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query _subsetparams - + icepyx.core.APIformatting.Parameters->icepyx.core.query.Query @@ -188,8 +188,8 @@ icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + @@ -200,21 +200,14 @@ icepyx.core.read.Read - -Read - -filelist -is_s3 -product -vars - -load() - - - -icepyx.core.read.Read->icepyx.core.auth.EarthdataAuthMixin - - + +Read + +filelist +product +vars + +load() @@ -231,14 +224,14 @@ fmt_for_EGI() - + icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery _spatial - + icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery @@ -256,7 +249,7 @@ - + icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery @@ -279,30 +272,30 @@ remove(all, var_list, beam_list, keyword_list) - + icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + - + icepyx.core.variables.Variables->icepyx.core.query.Query _order_vars - + icepyx.core.variables.Variables->icepyx.core.query.Query _order_vars - + icepyx.core.variables.Variables->icepyx.core.read.Read - - + + _read_vars diff --git a/doc/source/user_guide/documentation/packages_user_uml.svg b/doc/source/user_guide/documentation/packages_user_uml.svg index a86000de8..8d8cf0dc9 100644 --- a/doc/source/user_guide/documentation/packages_user_uml.svg +++ b/doc/source/user_guide/documentation/packages_user_uml.svg @@ -4,11 +4,11 @@ - + packages_user_uml - + icepyx.core @@ -24,128 +24,122 @@ icepyx.core.auth - -icepyx.core.auth + +icepyx.core.auth icepyx.core.exceptions - -icepyx.core.exceptions + +icepyx.core.exceptions icepyx.core.granules - -icepyx.core.granules + +icepyx.core.granules icepyx.core.icesat2data - -icepyx.core.icesat2data + +icepyx.core.icesat2data icepyx.core.is2ref - -icepyx.core.is2ref + +icepyx.core.is2ref icepyx.core.query - -icepyx.core.query + +icepyx.core.query icepyx.core.query->icepyx.core.auth - - + + icepyx.core.query->icepyx.core.granules - - + + icepyx.core.variables - -icepyx.core.variables + +icepyx.core.variables icepyx.core.query->icepyx.core.variables - - + + icepyx.core.visualization - -icepyx.core.visualization + +icepyx.core.visualization icepyx.core.query->icepyx.core.visualization - - + + icepyx.core.read - -icepyx.core.read - - - -icepyx.core.read->icepyx.core.auth - - + +icepyx.core.read - + icepyx.core.read->icepyx.core.exceptions - - + + - + icepyx.core.read->icepyx.core.variables - - + + icepyx.core.spatial - -icepyx.core.spatial + +icepyx.core.spatial icepyx.core.temporal - -icepyx.core.temporal + +icepyx.core.temporal icepyx.core.validate_inputs - -icepyx.core.validate_inputs + +icepyx.core.validate_inputs - + icepyx.core.variables->icepyx.core.auth - - + + - + icepyx.core.variables->icepyx.core.exceptions - - + + diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index c51c631be..d49d15f04 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -338,7 +338,6 @@ def latest_version(product): '006' """ _about_product = about_product(product) - return max([entry["version_id"] for entry in _about_product["feed"]["entry"]]) @@ -362,7 +361,7 @@ def extract_product(filepath, auth=None): "Must provide credentials to `auth` if accessing s3 data" ) # Read the s3 file - s3 = earthaccess.get_s3fs_session(daac="NSIDC") + s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth) f = h5py.File(s3.open(filepath, "rb")) else: # Otherwise assume a local filepath. Read with h5py. @@ -380,7 +379,6 @@ def extract_product(filepath, auth=None): product = _validate_product(product) except KeyError: raise "Unable to parse the product name from file metadata" - # Close the file reader f.close() return product @@ -406,7 +404,7 @@ def extract_version(filepath, auth=None): "Must provide credentials to `auth` if accessing s3 data" ) # Read the s3 file - s3 = earthaccess.get_s3fs_session(daac="NSIDC") + s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth) f = h5py.File(s3.open(filepath, "rb")) else: # Otherwise assume a local filepath. Read with h5py. @@ -418,12 +416,8 @@ def extract_version(filepath, auth=None): if isinstance(version, np.ndarray): # ATL14 stores the version as an array ['00x'] version = version[0] - if isinstance(version, bytes): - version = version.decode() - except KeyError: raise "Unable to parse the version from file metadata" - # Close the file reader f.close() return version diff --git a/icepyx/core/read.py b/icepyx/core/read.py index f2dae6495..e136a1d64 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -1,15 +1,12 @@ import fnmatch import glob import os -import sys import warnings -import earthaccess +import h5py import numpy as np -from s3fs.core import S3File import xarray as xr -from icepyx.core.auth import EarthdataAuthMixin from icepyx.core.exceptions import DeprecationError import icepyx.core.is2ref as is2ref from icepyx.core.variables import Variables as Variables @@ -135,7 +132,7 @@ def _check_datasource(filepath): Then the dict can also contain a catalog key with a dict of catalogs for each of those types of inputs ("s3" or "local") In general, the issue we'll run into with multiple files is going to be merging during the read in, so it could be beneficial to not hide this too much and mandate users handle this intentionally outside the read in itself. - + this function was derived with some of the following resources, based on echopype https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py @@ -160,9 +157,9 @@ def _validate_source(source): # acceptable inputs (for now) are a single file or directory # would ultimately like to make a Path (from pathlib import Path; isinstance(source, Path)) an option # see https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py#L82 - assert isinstance(source, str), "You must enter your input as a string." + assert type(source) == str, "You must enter your input as a string." assert ( - os.path.isdir(source) is True or os.path.isfile(source) is True + os.path.isdir(source) == True or os.path.isfile(source) == True ), "Your data source string is not a valid data source." return True @@ -261,21 +258,8 @@ def _pattern_to_glob(pattern): return glob_path -def _confirm_proceed(): - """ - Ask the user if they wish to proceed with processing. If 'y', or 'yes', then continue. Any - other user input will abort the process. - """ - answer = input("Do you wish to proceed (not recommended) y/[n]?") - if answer.lower() in ["y", "yes"]: - pass - else: - warnings.warn("Aborting", stacklevel=2) - sys.exit(0) - - # To do: test this class and functions therein -class Read(EarthdataAuthMixin): +class Read: """ Data object to read ICESat-2 data into the specified formats. Provides flexiblity for reading nested hdf5 files into common analysis formats. @@ -283,18 +267,13 @@ class Read(EarthdataAuthMixin): Parameters ---------- data_source : string, List - A string or list which specifies the files to be read. - The string can be either: - 1) the path of a single file - 2) the path to a directory or - 3) a [glob string](https://docs.python.org/3/library/glob.html). + A string or list which specifies the files to be read. The string can be either: 1) the path of a single file 2) the path to a directory or 3) a [glob string](https://docs.python.org/3/library/glob.html). The List must be a list of strings, each of which is the path of a single file. product : string ICESat-2 data product ID, also known as "short name" (e.g. ATL03). Available data products can be found at: https://nsidc.org/data/icesat-2/data-sets - **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. - The dataset product is read from the file metadata. + **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. The dataset product is read from the file metadata. filename_pattern : string, default None String that shows the filename pattern as previously required for Intake's path_as_pattern argument. @@ -360,10 +339,6 @@ def __init__( if data_source is None: raise ValueError("data_source is a required arguemnt") - - # initialize authentication properties - EarthdataAuthMixin.__init__(self) - # Raise warnings for deprecated arguments if filename_pattern: warnings.warn( @@ -392,56 +367,19 @@ def __init__( assert pattern_ck self._filelist = filelist elif isinstance(data_source, list): - # if data_source is a list pass that directly to _filelist self._filelist = data_source elif os.path.isdir(data_source): - # if data_source is a directory glob search the directory and assign to _filelist data_source = os.path.join(data_source, "*") self._filelist = glob.glob(data_source, **glob_kwargs) - elif isinstance(data_source, str): - if data_source.startswith("s3"): - # if the string is an s3 path put it in the _filelist without globbing - self._filelist = [data_source] - else: - # data_source is a globable string - self._filelist = glob.glob(data_source, **glob_kwargs) else: - raise TypeError( - "data_source should be a list of files, a directory, the path to a file, " - "or a glob string." - ) - # Remove any directories from the list (these get generated during recursive - # glob search) + self._filelist = glob.glob(data_source, **glob_kwargs) + # Remove any directories from the list self._filelist = [f for f in self._filelist if not os.path.isdir(f)] # Create a dictionary of the products as read from the metadata product_dict = {} - self.is_s3 = [False] * len(self._filelist) - for i, file_ in enumerate(self._filelist): - # If the path is an s3 path set the respective element of self.is_s3 to True - if file_.startswith("s3"): - self.is_s3[i] = True - auth = self.auth - else: - auth = None - product_dict[file_] = is2ref.extract_product(file_, auth=auth) - - # Raise an error if there are both s3 and non-s3 paths present - if len(set(self.is_s3)) > 1: - raise TypeError( - "Mixed local and s3 paths is not supported. data_source must contain " - "only s3 paths or only local paths" - ) - self.is_s3 = self.is_s3[0] # Change is_s3 into one boolean value for _filelist - # Raise warning if more than 2 s3 files are given - if self.is_s3 is True and len(self._filelist) > 2: - warnings.warn( - "Processing more than two s3 files can take a prohibitively long time. " - "Approximate access time (using `.load()`) can exceed 6 minutes per data " - "variable.", - stacklevel=2, - ) - _confirm_proceed() + for file_ in self._filelist: + product_dict[file_] = is2ref.extract_product(file_) # Raise warnings or errors for multiple products or products not matching the user-specified product all_products = list(set(product_dict.values())) @@ -784,14 +722,6 @@ def load(self): "to add variables to the wanted variables list." ) - if self.is_s3 is True and len(self.vars.wanted) > 3: - warnings.warn( - "Loading more than 3 variables from an s3 object can be prohibitively slow" - "Approximate access time (using `.load()`) can exceed 6 minutes per data " - "variable." - ) - _confirm_proceed() - # Append the minimum variables needed for icepyx to merge the datasets # Skip products which do not contain required variables if self.product not in ["ATL14", "ATL15", "ATL23"]: @@ -822,17 +752,9 @@ def load(self): # In these situations, xarray recommends manually controlling the merge/concat process yourself. # While unlikely to be a broad issue, I've heard of multiple matching timestamps causing issues for combining multiple IS2 datasets. for file in self.filelist: - if file.startswith("s3"): - # If path is an s3 path create an s3fs filesystem to reference the file - # TODO would it be better to be able to generate an s3fs session from the Mixin? - s3 = earthaccess.get_s3fs_session(daac="NSIDC") - file = s3.open(file, "rb") - all_dss.append( self._build_single_file_dataset(file, groups_list) ) # wanted_groups, vgrp.keys())) - if isinstance(file, S3File): - file.close() if len(all_dss) == 1: return all_dss[0] diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index b611b861d..4dd5444fe 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -85,7 +85,6 @@ def __init__( # Set the product and version from either the input args or the file if path: self._path = val.check_s3bucket(path) - # Set up auth if self._path.startswith("s3"): auth = self.auth