current status

icesat2py · Oct 23, 2023 · 3f4bfa7 · 3f4bfa7
1 parent b4d59d6
commit 3f4bfa7
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 45 deletions.
diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py
@@ -1,3 +1,4 @@
+import h5py
 import json
 import numpy as np
 import requests
@@ -333,3 +334,26 @@ def latest_version(product):
     return max(
         [entry["version_id"] for entry in _about_product["feed"]["entry"]]
     )
+
+def extract_product(filepath):
+    """
+    Read the product type from the metadata of the file. Return the product as a string.
+    """
+    with h5py.File(filepath, 'r') as f:
+        try: 
+            product = f.attrs['short_name'].decode()
+            product = _validate_product(product)
+        except KeyError:
+            raise 'Unable to parse the product name from file metadata'
+    return product
+
+def extract_version(filepath):
+    """
+    Read the version from the metadata of the file. Return the version as a string.
+    """
+    with h5py.File(filepath, 'r') as f:
+        try: 
+            version = f['METADATA']['DatasetIdentification'].attrs['VersionID'].decode()
+        except KeyError:
+            raise 'Unable to parse the version from file metadata'
+    return version
diff --git a/icepyx/core/read.py b/icepyx/core/read.py
@@ -379,7 +379,7 @@ def __init__(
         # Create a dictionary of the products as read from the metadata
         product_dict = {}
         for file_ in self._filelist:
-            product_dict[file_] = self._extract_product(file_)
+            product_dict[file_] = is2ref.extract_product(file_)
 
         # Raise warnings or errors for muliple products or products not matching the user-specified product
         all_products = list(set(product_dict.values()))
@@ -472,21 +472,6 @@ def product(self):
 
     # ----------------------------------------------------------------------
     # Methods
-
-    @staticmethod
-    def _extract_product(filepath):
-        """
-        Read the product type from the metadata of the file. Return the product as a string.
-        """
-        with h5py.File(filepath, 'r') as f:
-            try: 
-                product = f.attrs['short_name'].decode()
-                product = is2ref._validate_product(product)
-            # TODO test that this is the proper error
-            except KeyError:
-                raise 'Unable to parse the product name from file metadata'
-        return product
-
     @staticmethod
     def _check_source_for_pattern(source, filename_pattern):
         """
@@ -735,6 +720,22 @@ def load(self):
         # so to get a combined dataset, we need to keep track of spots under the hood, open each group, and then combine them into one xarray where the spots are IDed somehow (or only the strong ones are returned)
         # this means we need to get/track from each dataset we open some of the metadata, which we include as mandatory variables when constructing the wanted list
 
+        # Append the minimum variables needed for icepyx to merge the datasets
+                # Adjust the nec_varlist for individual products
+        var_list=[
+            "sc_orient",
+            "atlas_sdp_gps_epoch",
+            "cycle_number",
+            "rgt",
+            "data_start_utc",
+            "data_end_utc",
+        ]
+
+        if self.product == "ATL11":
+            var_list.remove("sc_orient")
+
+        self._read_vars.append(defaults=False, var_list=var_list)
+        print(self._read_vars)
         try:
             groups_list = list_of_dict_vals(self._read_vars.wanted)
         except AttributeError:

diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py
@@ -83,6 +83,9 @@ def __init__(
         # Set the product and version from either the input args or the file
         if path:
             self.path = path
+            self.product = is2ref.extract_product(self.path)
+            # TODO what is the best way to fill in the version here? --> read it from the file
+            self.version = is2ref.extract_version(self.path)
         elif product:
             # Check for valid product string
             self.product = is2ref._validate_product(product)
@@ -475,42 +478,40 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
         self._check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list)
 
         # add the mandatory variables to the data object
-        # TODO QUESTION - why is this distinction made? How can we handle it wihtout vartype?
-        if self._vartype == "order":
-            nec_varlist = [
-                "sc_orient",
-                "sc_orient_time",
-                "atlas_sdp_gps_epoch",
-                "data_start_utc",
-                "data_end_utc",
-                "granule_start_utc",
-                "granule_end_utc",
-                "start_delta_time",
-                "end_delta_time",
-            ]
-        elif self._vartype == "file":
-            nec_varlist = [
-                "sc_orient",
-                "atlas_sdp_gps_epoch",
-                "cycle_number",
-                "rgt",
-                "data_start_utc",
-                "data_end_utc",
-            ]
-
-        # Adjust the nec_varlist for individual products
-        if self.product == "ATL11":
-            nec_varlist.remove("sc_orient")
+        # TODO QUESTION - why is this distinction made? How can we handle it without vartype?
+        # if self._vartype == "order":
+        #     nec_varlist = [
+        #         "sc_orient",
+        #         "sc_orient_time",
+        #         "atlas_sdp_gps_epoch",
+        #         "data_start_utc",
+        #         "data_end_utc",
+        #         "granule_start_utc",
+        #         "granule_end_utc",
+        #         "start_delta_time",
+        #         "end_delta_time",
+        #     ]
+        # elif self._vartype == "file":
+        #     nec_varlist = [
+        #         "sc_orient",
+        #         "atlas_sdp_gps_epoch",
+        #         "cycle_number",
+        #         "rgt",
+        #         "data_start_utc",
+        #         "data_end_utc",
+        #     ]
 
         try:
-            self._check_valid_lists(vgrp, allpaths, var_list=nec_varlist)
+            self._check_valid_lists(vgrp, allpaths)
         except ValueError:
             # Assume gridded product since user input lists were previously validated
             nec_varlist = []
 
+        # TODO there is a lot of logic next that handles the required variables. Does it make
+        # sense to even have required variables anymore, if we are handling those via query/read?
         if not hasattr(self, "wanted") or self.wanted == None:
-            for varid in nec_varlist:
-                req_vars[varid] = vgrp[varid]
+            # for varid in nec_varlist:
+            #     req_vars[varid] = vgrp[varid]
             self.wanted = req_vars
 
             # DEVGOAL: add a secondary var list to include uncertainty/error information for lower level data if specific data variables have been specified...