fix s3url grabbing for .nc files (#507)

reformat some long-lines as well
icesat2py · Feb 8, 2024 · 826e936 · 826e936
1 parent 7e6510c
commit 826e936
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 16 deletions.
diff --git a/icepyx/core/granules.py b/icepyx/core/granules.py
@@ -7,7 +7,6 @@
 import numpy as np
 import os
 import pprint
-import warnings
 from xml.etree import ElementTree as ET
 import zipfile
 
@@ -37,7 +36,8 @@ def info(grans):
 # DevNote: could add flag to separate ascending and descending orbits based on ATL03 granule region
 def gran_IDs(grans, ids=False, cycles=False, tracks=False, dates=False, cloud=False):
     """
-    Returns a list of granule information for each granule dictionary in the input list of granule dictionaries.
+    Returns a list of granule information for each granule dictionary
+    in the input list of granule dictionaries.
     Granule info may be from a list of those available from NSIDC (for ordering/download)
     or a list of granules present on the file system.
 
@@ -71,15 +71,17 @@ def gran_IDs(grans, ids=False, cycles=False, tracks=False, dates=False, cloud=Fa
         producer_granule_id = gran["producer_granule_id"]
         gran_ids.append(producer_granule_id)
 
-        if cloud == True:
+        if cloud is True:
             try:
                 for link in gran["links"]:
-                    if link["href"].startswith("s3") and link["href"].endswith(".h5"):
+                    if link["href"].startswith("s3") and link["href"].endswith(
+                        (".h5", "nc")
+                    ):
                         gran_s3urls.append(link["href"])
             except KeyError:
                 pass
 
-        if any([param == True for param in [cycles, tracks, dates]]):
+        if any([param is True for param in [cycles, tracks, dates]]):
             # PRD: ICESat-2 product
             # HEM: Sea Ice Hemisphere flag
             # YY,MM,DD,HH,MN,SS: Year, Month, Day, Hour, Minute, Second
@@ -200,7 +202,8 @@ def get_avail(self, CMRparams, reqparams, cloud=False):
         granule_search_url = "https://cmr.earthdata.nasa.gov/search/granules"
 
         headers = {"Accept": "application/json", "Client-Id": "icepyx"}
-        # note we should also check for errors whenever we ping NSIDC-API - make a function to check for errors
+        # note we should also check for errors whenever we ping NSIDC-API -
+        # make a function to check for errors
 
         params = apifmt.combine_params(
             CMRparams,
@@ -251,7 +254,8 @@ def get_avail(self, CMRparams, reqparams, cloud=False):
             len(self.avail) > 0
         ), "Your search returned no results; try different search parameters"
 
-    # DevNote: currently, default subsetting DOES NOT include variable subsetting, only spatial and temporal
+    # DevNote: currently, default subsetting DOES NOT include variable subsetting,
+    # only spatial and temporal
     # DevGoal: add kwargs to allow subsetting and more control over request options.
     def place_order(
         self,
@@ -284,12 +288,15 @@ def place_order(
             Progress information is automatically printed regardless of the value of verbose.
         subset : boolean, default True
             Apply subsetting to the data order from the NSIDC, returning only data that meets the
-            subset parameters. Spatial and temporal subsetting based on the input parameters happens
+            subset parameters.
+            Spatial and temporal subsetting based on the input parameters happens
             by default when subset=True, but additional subsetting options are available.
-            Spatial subsetting returns all data that are within the area of interest (but not complete
-            granules. This eliminates false-positive granules returned by the metadata-level search)
+            Spatial subsetting returns all data that are within the area of interest
+            (but not complete granules.
+            This eliminates false-positive granules returned by the metadata-level search)
         session : requests.session object
-            A session object authenticating the user to order data using their Earthdata login information.
+            A session object authenticating the user to order data using their
+            Earthdata login information.
             The session object will automatically be passed from the query object if you
             have successfully logged in there.
         geom_filepath : string, default None
@@ -452,10 +459,10 @@ def place_order(
             else:
                 print("Request failed.")
 
-            # DevGoal: save orderIDs more frequently than just at the end for large orders (e.g. for len(reqparams['page_num']) > 5 or 10 or something)
+            # DevGoal: save orderIDs more frequently than just at the end for large orders
+            # (e.g. for len(reqparams['page_num']) > 5 or 10 or something)
             # Save orderIDs to file to avoid resubmitting order in case kernel breaks down.
             # save orderIDs for every 5 orders when more than 10 orders are submitted.
-            # DevNote: These numbers are hard coded for now. Consider to allow user to set them in future?
             if reqparams["page_num"] >= 10:
                 with open(order_fn, "w") as fid:
                     json.dump({"orderIDs": self.orderIDs}, fid)
@@ -483,8 +490,9 @@ def download(self, verbose, path, session=None, restart=False):
             The session object will automatically be passed from the query object if you
             have successfully logged in there.
         restart : boolean, default False
-            Restart your download if it has been interrupted. If the kernel has been restarted, but you successfully
-            completed your order, you will need to re-initialize your query class object and log in to Earthdata
+            Restart your download if it has been interrupted.
+            If the kernel has been restarted, but you successfully
+            completed your order, you will need to re-initialize your query class object
             and can then skip immediately to the download_granules method with restart=True.
 
         Notes
@@ -508,7 +516,8 @@ def download(self, verbose, path, session=None, restart=False):
             )
             # DevGoal: make this a more robust check for an active session
 
-        # DevNote: this will replace any existing orderIDs with the saved list (could create confusion depending on whether download was interrupted or kernel restarted)
+        # DevNote: this will replace any existing orderIDs with the saved list
+        # (could create confusion depending on whether download was interrupted or kernel restarted)
         order_fn = ".order_restart"
         if os.path.exists(order_fn):
             with open(order_fn, "r") as fid:

diff --git a/icepyx/tests/test_granules.py b/icepyx/tests/test_granules.py
@@ -29,6 +29,9 @@
 # region_a = ipx.Query(short_name, spatial_extent, date_range)
 # region_a.avail_granules(ids=True)
 
+# add test that s3urls are gotten for ALL products (e.g. ATL15 was failing
+# due to .nc extention instead of .h5))
+
 
 # DevNote: clearly there's a better way that doesn't make the function so long...
 # what is it?