From 6aac4408a3df8b9151539b9bad5a9428a627e8fc Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 22 Jan 2024 16:14:53 -0500
Subject: [PATCH 1/6] fix: isolate data pull

---
 src/icesat2_tracks/ICEsat2_SI_tools/io.py     | 795 ++++++++++--------
 .../analysis_db/B01_SL_load_single_file.py    |  39 +-
 2 files changed, 468 insertions(+), 366 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
index 521e305c..f3349744 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
@@ -1,41 +1,50 @@
+from sliderule import icesat2
+from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct
 
 
 def init_from_input(arguments):
-    if (len(arguments) <= 1) | ('-f' in set(arguments) ) :
+    """
+    Initializes the variables track_name, batch_key, and test_flag based on the input arguments.
+
+    Parameters:
+    arguments (list): A list of input arguments.
 
-        track_name='20190605061807_10380310_004_01'
-        batch_key='SH_batch01'
+    Returns:
+    tuple: A tuple containing the values of track_name, batch_key, and test_flag.
+    """
+
+    if (len(arguments) <= 1) | ("-f" in set(arguments)):
+        track_name = "20190605061807_10380310_004_01"
+        batch_key = "SH_batch01"
         test_flag = True
-        print('use standard values')
+        print("use standard values")
 
     else:
+        track_name = arguments[1]
+        batch_key = arguments[2]
+        # $(hemisphere) $(coords) $(config)
 
-        track_name=arguments[1]
-        batch_key =arguments[2]
-        #$(hemisphere) $(coords) $(config)
-
-        print('read vars from file: ' +str(arguments[1]) )
+        print("read vars from file: " + str(arguments[1]))
 
-        if (len(arguments) >= 4):
-            if arguments[3] == 'True':
+        if len(arguments) >= 4:
+            if arguments[3] == "True":
                 test_flag = True
-            elif arguments[3] == 'False':
+            elif arguments[3] == "False":
                 test_flag = False
             else:
-                test_flag= arguments[3]
+                test_flag = arguments[3]
 
-            print('test_flag found, test_flag= '+str(test_flag) )
+            print("test_flag found, test_flag= " + str(test_flag))
         else:
-            test_flag=False
+            test_flag = False
     print(track_name)
 
-
-    print('----- batch ='+ batch_key)
-    print('----- test_flag: ' + str(test_flag))
+    print("----- batch =" + batch_key)
+    print("----- test_flag: " + str(test_flag))
     return track_name, batch_key, test_flag
 
 
-def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'):
+def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"):
     """
     Takes inputs and retrieves the ID, track_names that can be loaded, hemis, batch
     inputs: are the outputs from init_from_input, specifically
@@ -49,103 +58,146 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'):
     """
 
     print(ID_name, batch_key, ID_flag)
-    hemis, batch = batch_key.split('_')
+    hemis, batch = batch_key.split("_")
 
     if ID_flag:
-        ID_path = ID_root +'/'+batch_key+'/'+prefix+'/'
-        ID      = json_load( prefix +'_'+ID_name, ID_path )
-        track_names = ID['tracks']['ATL03']
+        ID_path = ID_root + "/" + batch_key + "/" + prefix + "/"
+        ID = json_load(prefix + "_" + ID_name, ID_path)
+        track_names = ID["tracks"]["ATL03"]
 
     else:
-        track_names = ['ATL03_'+ID_name]
-        ID          = ID_name
+        track_names = ["ATL03_" + ID_name]
+        ID = ID_name
 
     return ID, track_names, hemis, batch
 
+
 def ID_to_str(ID_name):
     from datetime import datetime
 
-    IDs = ID_name.split('_')
-    date = datetime.strptime(IDs[1], '%Y%m%d').strftime('%Y-%m-%d')#.strftime('%m/%d/%Y')
-    date
-    return IDs[0] +' ' +date +' granule: ' + IDs[2]
+    IDs = ID_name.split("_")
+    date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d")
+    return IDs[0] + " " + date + " granule: " + IDs[2]
+
+
+def get_gdf(ATL03_track_name, params_yapc, maximum_height):
+    print("Retrieving data from sliderule ...")
+    gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
+
+    if gdf.empty:
+        raise Exception("Empty Geodataframe. No data could be retrieved.")
+
+    print("Initial data retrieved")
+    gdf = sct.correct_and_remove_height(gdf, maximum_height)
+    return gdf
+
 
 class case_ID:
     """docstring for case_ID"""
+
     def __init__(self, track_name):
         import re
-        track_name_pattern = r'(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?'
-        case_ID_pattern = r'(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})'
 
-        track_name_rx = re.compile(track_name_pattern)
-        self.hemis,self.YY,self.MM,self.DD,self.HH,self.MN,self.SS,self.TRK,self.CYC,self.GRN,self.RL,self.VRS = track_name_rx.findall(track_name).pop()
+        track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
-        if self.hemis == '01':
-            self.hemis = 'NH'
-        elif self.hemis == '02':
-            self.hemis = 'SH'
+        track_name_rx = re.compile(track_name_pattern)
+        (
+            self.hemis,
+            self.YY,
+            self.MM,
+            self.DD,
+            self.HH,
+            self.MN,
+            self.SS,
+            self.TRK,
+            self.CYC,
+            self.GRN,
+            self.RL,
+            self.VRS,
+        ) = track_name_rx.findall(track_name).pop()
+
+        if self.hemis == "01":
+            self.hemis = "NH"
+        elif self.hemis == "02":
+            self.hemis = "SH"
         else:
             self.hemis = self.hemis
-        #self.hemis = hemis
+        # self.hemis = hemis
         self.set()
         self.track_name_init = track_name
 
     def set(self):
-        block1 = (self.YY,self.MM,self.DD)
-        block2 = (self.TRK,self.CYC,self.GRN)
+        block1 = (self.YY, self.MM, self.DD)
+        block2 = (self.TRK, self.CYC, self.GRN)
 
-        self.ID = self.hemis+'_'+''.join(block1) +'_'+ ''.join(block2)
+        self.ID = self.hemis + "_" + "".join(block1) + "_" + "".join(block2)
         return self.ID
 
     def get_granule(self):
-        return ''.join((self.TRK,self.CYC,self.GRN))
+        return "".join((self.TRK, self.CYC, self.GRN))
 
     def set_dummy(self):
-        block1 = (self.YY,self.MM,self.DD)
-        block2 = (self.TRK,self.CYC,self.GRN)
+        block1 = (self.YY, self.MM, self.DD)
+        block2 = (self.TRK, self.CYC, self.GRN)
 
-        self.ID_dummy = ''.join(block1) +'_'+ ''.join(block2)
+        self.ID_dummy = "".join(block1) + "_" + "".join(block2)
         return self.ID_dummy
 
     def set_ATL03_trackname(self):
-
-        block1 = (self.YY,self.MM,self.DD)
-        block1b = (self.HH,self.MN,self.SS)
-        block2 = (self.TRK,self.CYC,self.GRN)
-        if self.RL is '':
+        block1 = (self.YY, self.MM, self.DD)
+        block1b = (self.HH, self.MN, self.SS)
+        block2 = (self.TRK, self.CYC, self.GRN)
+        if self.RL is "":
             raise ValueError("RL not set")
-        if self.VRS is '':
+        if self.VRS is "":
             raise ValueError("VRS not set")
 
-        block3 = (self.RL,self.VRS)
+        block3 = (self.RL, self.VRS)
 
-        self.ID_ATL03 = ''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3)
+        self.ID_ATL03 = (
+            "".join(block1)
+            + "".join(block1b)
+            + "_"
+            + "".join(block2)
+            + "_"
+            + "_".join(block3)
+        )
         return self.ID_ATL03
 
     def set_ATL10_trackname(self):
-
-        block1 = (self.YY,self.MM,self.DD)
-        block1b = (self.HH,self.MN,self.SS)
-        block2 = (self.TRK,self.CYC, '01') # granule is alwasy '01' for ATL10
-        if self.RL is '':
+        block1 = (self.YY, self.MM, self.DD)
+        block1b = (self.HH, self.MN, self.SS)
+        block2 = (self.TRK, self.CYC, "01")  # granule is alwasy '01' for ATL10
+        if self.RL is "":
             raise ValueError("RL not set")
-        if self.VRS is '':
+        if self.VRS is "":
             raise ValueError("VRS not set")
 
-        block3 = (self.RL,self.VRS)
+        block3 = (self.RL, self.VRS)
 
-        if self.hemis == 'NH':
-            hemis = '01'
-        elif self.hemis == 'SH':
-            hemis = '02'
+        if self.hemis == "NH":
+            hemis = "01"
+        elif self.hemis == "SH":
+            hemis = "02"
         else:
             hemis = self.hemis
 
-        self.ID_ATL10 = hemis+'_'+''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3)
+        self.ID_ATL10 = (
+            hemis
+            + "_"
+            + "".join(block1)
+            + "".join(block1b)
+            + "_"
+            + "".join(block2)
+            + "_"
+            + "_".join(block3)
+        )
         return self.ID_ATL10
 
 
-def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=None, password=None):
+def nsidc_icesat2_get_associated_file(
+    file_list, product, build=True, username=None, password=None
+):
     """
     THis method returns assocociated files names and paths for files given
     in file_list for the "product" ICEsat2 product
@@ -163,105 +215,104 @@ def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=N
     import posixpath
     import os
     import icesat2_toolkit.utilities
-    AUXILIARY=False
-    #product='ATL03'
-    DIRECTORY= None
-    FLATTEN=False
-    TIMEOUT=120
-    MODE=0o775
-    #file_list  = ['ATL07-01_20210301023054_10251001_005_01']
+
+    AUXILIARY = False
+    # product='ATL03'
+    DIRECTORY = None
+    FLATTEN = False
+    TIMEOUT = 120
+    MODE = 0o775
+    # file_list  = ['ATL07-01_20210301023054_10251001_005_01']
 
     if build and not (username or password):
-        urs = 'urs.earthdata.nasa.gov'
-        username,login,password = netrc.netrc().authenticators(urs)
-    #-- build urllib2 opener and check credentials
+        urs = "urs.earthdata.nasa.gov"
+        username, login, password = netrc.netrc().authenticators(urs)
+    # -- build urllib2 opener and check credentials
     if build:
-        #-- build urllib2 opener with credentials
+        # -- build urllib2 opener with credentials
         icesat2_toolkit.utilities.build_opener(username, password)
-        #-- check credentials
+        # -- check credentials
         icesat2_toolkit.utilities.check_credentials()
 
     parser = lxml.etree.HTMLParser()
-    #-- remote https server for ICESat-2 Data
-    HOST = 'https://n5eil01u.ecs.nsidc.org'
-    #-- regular expression operator for extracting information from files
-    rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})'
-        r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})')
-    #-- regular expression pattern for finding specific files
-    regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$'
-    remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
-        r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}')
-
-    # rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})'
-    #     r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$')
-    # #-- regular expression pattern for finding specific files
-    # regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$'
-    # remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
-    #     r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}')
-
-    #-- build list of remote files, remote modification times and local files
+    # -- remote https server for ICESat-2 Data
+    HOST = "https://n5eil01u.ecs.nsidc.org"
+    # -- regular expression operator for extracting information from files
+    rx = re.compile(
+        r"(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})"
+        r"(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})"
+    )
+    # -- regular expression pattern for finding specific files
+    regex_suffix = "(.*?)$" if AUXILIARY else "(h5)$"
+    remote_regex_pattern = (
+        r"{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})"
+        r"(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}"
+    )
+
+    # -- build list of remote files, remote modification times and local files
     original_files = []
     remote_files = []
     remote_mtimes = []
     local_files = []
-    remote_names =[]
+    remote_names = []
 
     for input_file in file_list:
-        #print(input_file)
-        #-- extract parameters from ICESat-2 ATLAS HDF5 file name
-        SUB,PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYC,GRN,RL,VRS = \
-            rx.findall(input_file).pop()
-        #-- get directories from remote directory
-        product_directory = '{0}.{1}'.format(product,RL)
-        sd = '{0}.{1}.{2}'.format(YY,MM,DD)
-        PATH = [HOST,'ATLAS',product_directory,sd]
-        #-- local and remote data directories
-        remote_dir=posixpath.join(*PATH)
-        temp=os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY
-        local_dir=os.path.expanduser(temp) if FLATTEN else os.path.join(temp,sd)
-        #-- create output directory if not currently existing
+        # print(input_file)
+        # -- extract parameters from ICESat-2 ATLAS HDF5 file name
+        SUB, PRD, HEM, YY, MM, DD, HH, MN, SS, TRK, CYC, GRN, RL, VRS = rx.findall(
+            input_file
+        ).pop()
+        # -- get directories from remote directory
+        product_directory = "{0}.{1}".format(product, RL)
+        sd = "{0}.{1}.{2}".format(YY, MM, DD)
+        PATH = [HOST, "ATLAS", product_directory, sd]
+        # -- local and remote data directories
+        remote_dir = posixpath.join(*PATH)
+        temp = os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY
+        local_dir = os.path.expanduser(temp) if FLATTEN else os.path.join(temp, sd)
+        # -- create output directory if not currently existing
         # if not os.access(local_dir, os.F_OK):
         #     os.makedirs(local_dir, MODE)
-        #-- compile regular expression operator for file parameters
-        args = (product,TRK,CYC,GRN,RL,regex_suffix)
+        # -- compile regular expression operator for file parameters
+        args = (product, TRK, CYC, GRN, RL, regex_suffix)
         R1 = re.compile(remote_regex_pattern.format(*args), re.VERBOSE)
-        #-- find associated ICESat-2 data file
-        #-- find matching files (for granule, release, version, track)
-        colnames,collastmod,colerror=icesat2_toolkit.utilities.nsidc_list(PATH,
-            build=False,
-            timeout=TIMEOUT,
-            parser=parser,
-            pattern=R1,
-            sort=True)
+        # -- find associated ICESat-2 data file
+        # -- find matching files (for granule, release, version, track)
+        colnames, collastmod, colerror = icesat2_toolkit.utilities.nsidc_list(
+            PATH, build=False, timeout=TIMEOUT, parser=parser, pattern=R1, sort=True
+        )
         print(colnames)
-        #-- print if file was not found
+        # -- print if file was not found
         if not colnames:
             print(colerror)
             continue
-        #-- add to lists
-        for colname,remote_mtime in zip(colnames,collastmod):
-            #-- save original file to list (expands if getting auxiliary files)
+        # -- add to lists
+        for colname, remote_mtime in zip(colnames, collastmod):
+            # -- save original file to list (expands if getting auxiliary files)
             original_files.append(input_file)
-            #-- remote and local versions of the file
-            remote_files.append(posixpath.join(remote_dir,colname))
-            local_files.append(os.path.join(local_dir,colname))
+            # -- remote and local versions of the file
+            remote_files.append(posixpath.join(remote_dir, colname))
+            local_files.append(os.path.join(local_dir, colname))
             remote_mtimes.append(remote_mtime)
             remote_names.append(colname)
 
-    return original_files, remote_files, remote_names #product_directory, sd,
+    return original_files, remote_files, remote_names  # product_directory, sd,
+
 
 def json_load(name, path, verbose=False):
     import json
     import os
-    full_name= (os.path.join(path,name+ '.json'))
 
-    with open(full_name, 'r') as ifile:
-        data=json.load(ifile)
+    full_name = os.path.join(path, name + ".json")
+
+    with open(full_name, "r") as ifile:
+        data = json.load(ifile)
     if verbose:
-        print('loaded from: ',full_name)
+        print("loaded from: ", full_name)
     return data
 
-def ATL03_download(username,password, dpath, product_directory, sd, file_name):
+
+def ATL03_download(username, password, dpath, product_directory, sd, file_name):
     """
     inputs:
     username: username for https://urs.earthdata.nasa.gov
@@ -272,102 +323,114 @@ def ATL03_download(username,password, dpath, product_directory, sd, file_name):
     file_name           'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory
     """
     import icesat2_toolkit.utilities
-    from icesat2_toolkit.read_ICESat2_ATL03 import read_HDF5_ATL03
-    HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS',product_directory,sd, file_name]
-    # HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS','ATL03.003','2018.10.14',
-    #     'ATL03_20181014000347_02350101_003_01.h5']
-    print('download to:', dpath+'/'+HOST[-1])
-    buffer,error=icesat2_toolkit.utilities.from_nsidc(HOST,username=username,
-        password=password,local=dpath+'/'+HOST[-1],verbose=True)
-    #-- raise exception if download error
+
+    HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name]
+    print("download to:", dpath + "/" + HOST[-1])
+    buffer, error = icesat2_toolkit.utilities.from_nsidc(
+        HOST,
+        username=username,
+        password=password,
+        local=dpath + "/" + HOST[-1],
+        verbose=True,
+    )
+    # -- raise exception if download error
     if not buffer:
         raise Exception(error)
 
-def save_pandas_table(table_dict, name , save_path):
+
+def save_pandas_table(table_dict, name, save_path):
     import os
+
     if not os.path.exists(save_path):
         os.makedirs(save_path)
 
     import warnings
     from pandas import HDFStore
     from pandas.io.pytables import PerformanceWarning
-    warnings.filterwarnings('ignore',category=PerformanceWarning)
 
-    with HDFStore(save_path+'/'+name+'.h5') as store:
-        for name,table in table_dict.items():
-                store[name]=table
+    warnings.filterwarnings("ignore", category=PerformanceWarning)
+
+    with HDFStore(save_path + "/" + name + ".h5") as store:
+        for name, table in table_dict.items():
+            store[name] = table
+
 
-def load_pandas_table_dict(name , save_path):
+def load_pandas_table_dict(name, save_path):
     import warnings
     from pandas import HDFStore
     from pandas.io.pytables import PerformanceWarning
-    warnings.filterwarnings('ignore',category=PerformanceWarning)
 
-    return_dict=dict()
-    with HDFStore(save_path+'/'+name+'.h5') as store:
-        #print(store)
-        #print(store.keys())
+    warnings.filterwarnings("ignore", category=PerformanceWarning)
+
+    return_dict = dict()
+    with HDFStore(save_path + "/" + name + ".h5") as store:
         for k in store.keys():
-            return_dict[k[1:]]=store.get(k)
+            return_dict[k[1:]] = store.get(k)
 
     return return_dict
 
-def get_beam_hdf_store(ATL03_k):
 
+def get_beam_hdf_store(ATL03_k):
     import pandas as pd
-    DD = pd.DataFrame()#columns = ATL03.keys())
+
+    DD = pd.DataFrame()  # columns = ATL03.keys())
     for ikey in ATL03_k.keys():
         DD[ikey] = ATL03_k[ikey]
 
     return DD
 
-def get_beam_var_hdf_store(ATL03_k, ikey):
 
+def get_beam_var_hdf_store(ATL03_k, ikey):
     import pandas as pd
-    DD = pd.DataFrame()#columns = ATL03.keys())
+
+    DD = pd.DataFrame()  # columns = ATL03.keys())
     DD[ikey] = ATL03_k[ikey]
     return DD
 
 
-def write_track_to_HDF5(data_dict, name, path, verbose=False, mode='w'):
+def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
     import os
     import h5py
-    mode = 'w' if mode is None else mode
+
+    mode = "w" if mode is None else mode
     if not os.path.exists(path):
         os.makedirs(path)
 
-    full_name= (os.path.join(path,name+ '.h5'))
+    full_name = os.path.join(path, name + ".h5")
     store = h5py.File(full_name, mode)
 
     for k in data_dict.keys():
         store1 = store.create_group(k)
         for kk, I in list(data_dict[k].items()):
-            store1[kk]=I
-        #store1.close()
+            store1[kk] = I
+        # store1.close()
 
     store.close()
 
     if verbose:
-        print('saved at: ' +full_name)
+        print("saved at: " + full_name)
 
 
 def get_time_for_track(delta_time, atlas_epoch):
     "returns pandas dataframe"
     import pandas as pd
-    import convert_GPS_time as cGPS
+    import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
+
     # Conversion of delta_time to a calendar date
     temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
 
-    year = temp['year'][:].astype('int')
-    month = temp['month'][:].astype('int')
-    day = temp['day'][:].astype('int')
-    hour = temp['hour'][:].astype('int')
-    minute = temp['minute'][:].astype('int')
-    second = temp['second'][:].astype('int')
+    year = temp["year"][:].astype("int")
+    month = temp["month"][:].astype("int")
+    day = temp["day"][:].astype("int")
+    hour = temp["hour"][:].astype("int")
+    second = temp["second"][:].astype("int")
+
+    return pd.DataFrame(
+        {"year": year, "month": month, "day": day, "hour": hour, "second": second}
+    )
 
-    return pd.DataFrame({'year':year, 'month':month, 'day':day, 'hour':hour, 'second':second})
 
-def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
+def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     """
     returns 'beam' from fileT as pandas table.
     fillT   path of file
@@ -377,78 +440,66 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
     """
     # Add in a proper description of the function here
     import h5py
-    import convert_GPS_time as cGPS
     import pandas as pd
+
     # Open the file
-    ATL03       =   h5py.File(fileT, 'r')
-    lons        =   ATL03[beam+'/heights/lon_ph'][:]
-    lats        =   ATL03[beam+'/heights/lat_ph'][:]
+    ATL03 = h5py.File(fileT, "r")
+    lons = ATL03[beam + "/heights/lon_ph"][:]
+    lats = ATL03[beam + "/heights/lat_ph"][:]
 
     # Along track distance from equator i think.
-    along_track_distance=ATL03[beam+'/heights/dist_ph_along'][:]
-    across_track_distance=ATL03[beam+'/heights/dist_ph_across'][:]
-    #dem_h = ATL03[beam+'/geophys_corr/dem_h'][:]
-    #delta_time_dem_h = ATL03[beam+'/geophys_corr/delta_time'][:]
-    segment_dist_x=ATL03[beam+'/geolocation/segment_dist_x'][:]
-    segment_length=ATL03[beam+'/geolocation/segment_length'][:]
-    segment_id = ATL03[beam+'/geolocation/segment_id'][:]
+    along_track_distance = ATL03[beam + "/heights/dist_ph_along"][:]
+    across_track_distance = ATL03[beam + "/heights/dist_ph_across"][:]
+    segment_dist_x = ATL03[beam + "/geolocation/segment_dist_x"][:]
+    segment_length = ATL03[beam + "/geolocation/segment_length"][:]
+    segment_id = ATL03[beam + "/geolocation/segment_id"][:]
 
-    delta_time_geolocation  =   ATL03[beam+'/geolocation/delta_time'][:]
-    reference_photon_index=   ATL03[beam+'/geolocation/reference_photon_index'][:]
-    ph_index_beg=   ATL03[beam+'/geolocation/ph_index_beg'][:]
+    delta_time_geolocation = ATL03[beam + "/geolocation/delta_time"][:]
+    reference_photon_index = ATL03[beam + "/geolocation/reference_photon_index"][:]
+    ph_index_beg = ATL03[beam + "/geolocation/ph_index_beg"][:]
 
-
-    ph_id_count  =   ATL03[beam+'/heights/ph_id_count'][:]
+    ph_id_count = ATL03[beam + "/heights/ph_id_count"][:]
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time  =   ATL03[beam+'/heights/delta_time'][:]
-    #podppd_flag=ATL03[beam+'/geolocation/podppd_flag'][:]
-
-    # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch =   ATL03['/ancillary_data/atlas_sdp_gps_epoch'][:]
-
-    # Conversion of delta_time to a calendar date
-    temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
-
-    # Express delta_time relative to start time of granule
-    #delta_time_granule=delta_time-delta_time[0]
-
-    # year = temp['year'][:].astype('int')
-    # month = temp['month'][:].astype('int')
-    # day = temp['day'][:].astype('int')
-    # hour = temp['hour'][:].astype('int')
-    # minute = temp['minute'][:].astype('int')
-    # second = temp['second'][:].astype('int')
+    delta_time = ATL03[beam + "/heights/delta_time"][:]
 
     # Primary variables of interest
 
     # Photon height
-    heights=ATL03[beam+'/heights/h_ph'][:]
-    #print(heights.shape)
+    heights = ATL03[beam + "/heights/h_ph"][:]
+    # print(heights.shape)
 
     # Flag for signal confidence
     # column index:  0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater
     # values:
-        #-- -1: Events not associated with a specific surface type
-        #--  0: noise
-        #--  1: buffer but algorithm classifies as background
-        #--  2: low
-        #--  3: medium
-        #--  4: high
-
-    mask_ocean  = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 2 # ocean points  medium or high quality
-    mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 2 # sea ice points medium or high quality
-    mask_total  = (mask_seaice | mask_ocean)
-
-    if sum(~mask_total) == (ATL03[beam+'/heights/signal_conf_ph'][:, 1]).size:
-        print('zero photons, lower photon quality to 2 or higher')
-        mask_ocean  = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 1 # ocean points  medium or high quality
-        mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 1 # sea ice points medium or high quality
-        mask_total  = (mask_seaice | mask_ocean)
-
-    signal_confidence = ATL03[beam+'/heights/signal_conf_ph'][:, 1:3].max(1)
-    #print(signal_confidence.shape)
-
-    #return signal_confidence
+    # -- -1: Events not associated with a specific surface type
+    # --  0: noise
+    # --  1: buffer but algorithm classifies as background
+    # --  2: low
+    # --  3: medium
+    # --  4: high
+
+    mask_ocean = (
+        ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2
+    )  # ocean points  medium or high quality
+    mask_seaice = (
+        ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2
+    )  # sea ice points medium or high quality
+    mask_total = mask_seaice | mask_ocean
+
+    if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size:
+        print("zero photons, lower photon quality to 2 or higher")
+        mask_ocean = (
+            ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1
+        )  # ocean points  medium or high quality
+        mask_seaice = (
+            ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1
+        )  # sea ice points medium or high quality
+        mask_total = mask_seaice | mask_ocean
+
+    signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
+    # print(signal_confidence.shape)
+
+    # return signal_confidence
 
     # Add photon rate and background rate to the reader here
     ATL03.close()
@@ -458,26 +509,45 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
         return along_track_dist, elev
 
     else:
-        dF = pd.DataFrame({'heights':heights, 'lons':lons, 'lats':lats, 'signal_confidence':signal_confidence, 'mask_seaice':mask_seaice,
-                       'delta_time':delta_time, 'along_track_distance':along_track_distance, #'delta_time_granule':delta_time_granule,
-                        'across_track_distance':across_track_distance,'ph_id_count':ph_id_count})#,
-                        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
-
-        dF_seg = pd.DataFrame({'delta_time':delta_time_geolocation, 'segment_dist_x':segment_dist_x, 'segment_length':segment_length, 'segment_id':segment_id,
-                        'reference_photon_index':reference_photon_index, 'ph_index_beg':ph_index_beg})
+        dF = pd.DataFrame(
+            {
+                "heights": heights,
+                "lons": lons,
+                "lats": lats,
+                "signal_confidence": signal_confidence,
+                "mask_seaice": mask_seaice,
+                "delta_time": delta_time,
+                "along_track_distance": along_track_distance,  #'delta_time_granule':delta_time_granule,
+                "across_track_distance": across_track_distance,
+                "ph_id_count": ph_id_count,
+            }
+        )  # ,
+        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
+
+        dF_seg = pd.DataFrame(
+            {
+                "delta_time": delta_time_geolocation,
+                "segment_dist_x": segment_dist_x,
+                "segment_length": segment_length,
+                "segment_id": segment_id,
+                "reference_photon_index": reference_photon_index,
+                "ph_index_beg": ph_index_beg,
+            }
+        )
         # Filter out high elevation values
-        print('seg_dist shape ', segment_dist_x.shape)
-        print('df shape ',dF.shape)
+        print("seg_dist shape ", segment_dist_x.shape)
+        print("df shape ", dF.shape)
 
         dF = dF[mask_total]
-        #dF_seg = dF_seg[mask_total]
-        #print('df[mask] shape ',dF.shape)
+        # dF_seg = dF_seg[mask_total]
+        # print('df[mask] shape ',dF.shape)
 
         # Reset row indexing
-        #dF=dF#.reset_index(drop=True)
+        # dF=dF#.reset_index(drop=True)
         return dF, dF_seg
 
-def getATL03_height_correction(fileT, beam='gt1r'):
+
+def getATL03_height_correction(fileT, beam="gt1r"):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -486,25 +556,27 @@ def getATL03_height_correction(fileT, beam='gt1r'):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL03 = h5py.File(fileT, 'r')
+    ATL03 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'delta_time', # referenc time since equator crossing
-            'dem_h', # best giod approxiamtion
-            ]
+        "delta_time",  # referenc time since equator crossing
+        "dem_h",  # best giod approxiamtion
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL03[beam+'/geophys_corr/'+var][:]
+        D_bulk[var] = ATL03[beam + "/geophys_corr/" + var][:]
     dF_bulk = pd.DataFrame(D_bulk)
 
     ATL03.close()
 
     return dF_bulk
 
-def getATL07_beam(fileT, beam='gt1r', maxElev=1e6):
+
+def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -513,81 +585,82 @@ def getATL07_beam(fileT, beam='gt1r', maxElev=1e6):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'longitude',
-            'latitude',
-            'height_segment_id',# Height segment ID (10 km segments)
-            'seg_dist_x' # Along track distance from the equator crossing to the segment center.
-            ]
+        "longitude",
+        "latitude",
+        "height_segment_id",  # Height segment ID (10 km segments)
+        "seg_dist_x",  # Along track distance from the equator crossing to the segment center.
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/sea_ice_segments/'+var]
+        D_bulk[var] = ATL07[beam + "/sea_ice_segments/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/sea_ice_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/sea_ice_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
-    dF_time['delta_time'] = delta_time
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
+    dF_time["delta_time"] = delta_time
     ### Primary variables of interest
 
-
     vars = [
-            'across_track_distance', #Across track distance of photons averaged over the sea ice height segment.
-            'height_segment_asr_calc', #Computed apparent surface reflectance for the sea ice segment.
-            'height_segment_confidence',# # Height segment confidence flag
-            'height_segment_fit_quality_flag', # Flag Values: ['-1', '1', '2', '3', '4', '5']
-                                            #Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor']
-            'height_segment_height',     # Beam segment height
-            'height_segment_length_seg',  # Along track length of segment
-            'height_segment_ssh_flag', #Flag for potential leads, 0=sea ice, 1 = sea surface
-            'height_segment_surface_error_est', #Error estimate of the surface height
-            'height_segment_type',# Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
-                                  # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth'
-            'height_segment_w_gaussian', # Width of Gaussian fit
-            'height_segment_quality', # Height quality flag, 1 for good fit, 0 for bad
-            ]
-    #vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
-
-    D_heights=dict()
+        "across_track_distance",  # Across track distance of photons averaged over the sea ice height segment.
+        "height_segment_asr_calc",  # Computed apparent surface reflectance for the sea ice segment.
+        "height_segment_confidence",  # # Height segment confidence flag
+        "height_segment_fit_quality_flag",  # Flag Values: ['-1', '1', '2', '3', '4', '5']
+        # Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor']
+        "height_segment_height",  # Beam segment height
+        "height_segment_length_seg",  # Along track length of segment
+        "height_segment_ssh_flag",  # Flag for potential leads, 0=sea ice, 1 = sea surface
+        "height_segment_surface_error_est",  # Error estimate of the surface height
+        "height_segment_type",  # Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+        # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth'
+        "height_segment_w_gaussian",  # Width of Gaussian fit
+        "height_segment_quality",  # Height quality flag, 1 for good fit, 0 for bad
+    ]
+    # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
+
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/sea_ice_segments/heights/' +var][:]
+        D_heights[var] = ATL07[beam + "/sea_ice_segments/heights/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-
     vars_env = {
-            'mss':'geophysical/height_segment_mss', # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model.
-            't2m':'geophysical/height_segment_t2m',#Temperature at 2m above the displacement height (K)
-            'u2m':'geophysical/height_segment_u2m',#Eastward wind at 2m above the displacement height (m/s-1)
-            'v2m':'geophysical/height_segment_v2m',#Northward wind at 2m above the displacement height (m/s-1)
-            'n_photons_actual':'stats/n_photons_actual', # Number of photons gathered
-            'photon_rate':'stats/photon_rate', #photon_rate
+        "mss": "geophysical/height_segment_mss",  # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model.
+        "t2m": "geophysical/height_segment_t2m",  # Temperature at 2m above the displacement height (K)
+        "u2m": "geophysical/height_segment_u2m",  # Eastward wind at 2m above the displacement height (m/s-1)
+        "v2m": "geophysical/height_segment_v2m",  # Northward wind at 2m above the displacement height (m/s-1)
+        "n_photons_actual": "stats/n_photons_actual",  # Number of photons gathered
+        "photon_rate": "stats/photon_rate",  # photon_rate
     }
 
-    D_env=dict()
-    for var,I in vars_env.items():
-        D_env[  var] = ATL07[beam+'/sea_ice_segments/' +I][:]
+    D_env = dict()
+    for var, I in vars_env.items():
+        D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:]
     dF_env = pd.DataFrame(D_env)
 
-
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'heights': dF_heights, 'env': dF_env }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat(
+        {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1
+    )
 
     ATL07.close()
 
     # Filter out high elevation values
-    DF = DF[(DF['heights']['height_segment_height']<maxElev)]
+    DF = DF[(DF["heights"]["height_segment_height"] < maxElev)]
     # Reset row indexing
-    DF=DF.reset_index(drop=True)
+    DF = DF.reset_index(drop=True)
     return DF
 
-def getATL10_beam(fileT, beam='gt1r', maxElev=1e6):
+
+def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     This method returns relevant data for wave estimates from ALT 10 tracks.
     returns: Pandas data frames one for sea ice heights and one for leads
@@ -596,56 +669,85 @@ def getATL10_beam(fileT, beam='gt1r', maxElev=1e6):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
-    #f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
+    # f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
 
     vars_bulk = [
-            'seg_dist_x', 'latitude', 'longitude', 'height_segment_id',
-            'beam_fb_confidence', 'beam_fb_height', 'beam_fb_quality_flag', 'beam_fb_sigma'
-            ]
-
-    D_bulk= dict()
+        "seg_dist_x",
+        "latitude",
+        "longitude",
+        "height_segment_id",
+        "beam_fb_confidence",
+        "beam_fb_height",
+        "beam_fb_quality_flag",
+        "beam_fb_sigma",
+    ]
+
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/freeboard_beam_segment/beam_freeboard/'+var]
+        D_bulk[var] = ATL07[beam + "/freeboard_beam_segment/beam_freeboard/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/freeboard_beam_segment/height_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/freeboard_beam_segment/height_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
-    dF_time['delta_time'] = delta_time
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
+    dF_time["delta_time"] = delta_time
 
     ### Primary variables of interest
-    vars = ['height_segment_height','height_segment_length_seg', 'latitude', 'longitude', 'photon_rate', 'height_segment_type', 'height_segment_ssh_flag','ice_conc']
-
-    D_heights=dict()
+    vars = [
+        "height_segment_height",
+        "height_segment_length_seg",
+        "latitude",
+        "longitude",
+        "photon_rate",
+        "height_segment_type",
+        "height_segment_ssh_flag",
+        "ice_conc",
+    ]
+
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/freeboard_beam_segment/height_segments/' +var][:]
+        D_heights[var] = ATL07[beam + "/freeboard_beam_segment/height_segments/" + var][
+            :
+        ]
     dF_heights = pd.DataFrame(D_heights)
 
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'freeboard': dF_heights }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat({"time": dF_time, "ref": dF_bulk, "freeboard": dF_heights}, axis=1)
 
     # Filter out high elevation values
-    DF = DF[(DF['freeboard']['height_segment_height']<maxElev)]
+    DF = DF[(DF["freeboard"]["height_segment_height"] < maxElev)]
     # Reset row indexing
     DF = DF.reset_index(drop=True)
 
     # get leads as well
-    vars_leads = ['delta_time', 'latitude', 'lead_dist_x', 'lead_height', 'lead_length', 'lead_sigma', 'longitude', 'ssh_n', 'ssh_ndx']
-
-    D_leads=dict()
+    vars_leads = [
+        "delta_time",
+        "latitude",
+        "lead_dist_x",
+        "lead_height",
+        "lead_length",
+        "lead_sigma",
+        "longitude",
+        "ssh_n",
+        "ssh_ndx",
+    ]
+
+    D_leads = dict()
     for var in vars_leads:
-        D_leads[var] = ATL07[beam+'/leads/' +var][:]
+        D_leads[var] = ATL07[beam + "/leads/" + var][:]
     DF_leads = pd.DataFrame(D_leads)
 
     return DF, DF_leads
 
-def getATL07_height_corrections(fileT, beam='gt1r'):
+
+def getATL07_height_corrections(fileT, beam="gt1r"):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -654,47 +756,54 @@ def getATL07_height_corrections(fileT, beam='gt1r'):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'longitude',
-            'latitude',
-            'height_segment_id',# Height segment ID (10 km segments)
-            'seg_dist_x' # Along track distance from the equator crossing to the segment center.
-            ]
+        "longitude",
+        "latitude",
+        "height_segment_id",  # Height segment ID (10 km segments)
+        "seg_dist_x",  # Along track distance from the equator crossing to the segment center.
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/sea_ice_segments/'+var]
+        D_bulk[var] = ATL07[beam + "/sea_ice_segments/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/sea_ice_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/sea_ice_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
 
     ### Primary variables of interest
     vars = [
-            'height_segment_dac', #
-            'height_segment_ib', #
-            'height_segment_lpe',# #
-            'height_segment_mss', #
-            'height_segment_ocean',
-            ]
-    D_heights=dict()
+        "height_segment_dac",  #
+        "height_segment_ib",  #
+        "height_segment_lpe",  # #
+        "height_segment_mss",  #
+        "height_segment_ocean",
+    ]
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/sea_ice_segments/geophysical/' +var][:]
+        D_heights[var] = ATL07[beam + "/sea_ice_segments/geophysical/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'corrections': dF_heights, }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat(
+        {
+            "time": dF_time,
+            "ref": dF_bulk,
+            "corrections": dF_heights,
+        },
+        axis=1,
+    )
 
     ATL07.close()
     # Filter out high elevation values
     # Reset row indexing
-    DF=DF.reset_index(drop=True)
+    DF = DF.reset_index(drop=True)
     return DF
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 0f04f155..76f0fcc6 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -5,9 +5,10 @@
 import sys
 import datetime
 import copy
+from pathlib import Path
 
 import xarray as xr
-from sliderule import sliderule, icesat2
+from sliderule import icesat2
 
 from icesat2_tracks.config.IceSAT2_startup import (
     mconfig,
@@ -15,18 +16,16 @@
     font_for_pres,
     plt,
 )
-import icesat2_tracks.ICEsat2_SI_tools.sliderule_converter_tools as sct
-import icesat2_tracks.ICEsat2_SI_tools.io as io
-import icesat2_tracks.ICEsat2_SI_tools.beam_stats as beam_stats
-import icesat2_tracks.local_modules.m_tools_ph3 as MT
-from icesat2_tracks.local_modules import m_general_ph3 as M
-
+from icesat2_tracks.ICEsat2_SI_tools import (
+    sliderule_converter_tools as sct,
+    io,
+    beam_stats,
+)
+from icesat2_tracks.local_modules import m_tools_ph3 as MT, m_general_ph3 as M
 
 xr.set_options(display_style="text")
 
-
 # Select region and retrive batch of tracks
-
 track_name, batch_key, ID_flag = io.init_from_input(
     sys.argv
 )  # loads standard experiment
@@ -34,20 +33,16 @@
 plot_flag = True
 hemis = batch_key.split("_")[0]
 
-
-save_path = mconfig["paths"]["work"] + "/" + batch_key + "/B01_regrid/"
-MT.mkdirs_r(save_path)
-
-save_path_json = mconfig["paths"]["work"] + "/" + batch_key + "/A01b_ID/"
-MT.mkdirs_r(save_path_json)
+# Make target directories
+basedir = Path(mconfig["paths"]["work"], batch_key)
+save_path, save_path_json = Path(basedir, "B01_regrid"), Path(basedir, "A01b_ID")
+for p in [save_path, save_path_json]:
+    MT.mkdirs_r(p)
 
 ATL03_track_name = "ATL03_" + track_name + ".h5"
 
 # Configure SL Session
-sliderule.authenticate("brown", ps_username="mhell", ps_password="Oijaeth9quuh")
-icesat2.init(
-    "slideruleearth.io", organization="brown", desired_nodes=1, time_to_live=90
-)  # minutes
+icesat2.init("slideruleearth.io")
 
 
 # plot the ground tracks in geographic location
@@ -90,10 +85,8 @@
 }
 
 maximum_height = 30  # (meters) maximum height past dem_h correction
-print("STARTS")
-gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
-print("ENDS")
-gdf = sct.correct_and_remove_height(gdf, maximum_height)
+
+gdf = io.get_gdf(ATL03_track_name, params_yapc, maximum_height)
 
 
 cdict = dict()

From 4d563ca1b9a6d80c86495f7cde25f091d4a28a13 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 22 Jan 2024 16:36:42 -0500
Subject: [PATCH 2/6] fix: use better name for wrapper

---
 src/icesat2_tracks/ICEsat2_SI_tools/io.py     | 33 ++++++++++---------
 .../analysis_db/B01_SL_load_single_file.py    |  2 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
index f3349744..193229de 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
@@ -80,7 +80,18 @@ def ID_to_str(ID_name):
     return IDs[0] + " " + date + " granule: " + IDs[2]
 
 
-def get_gdf(ATL03_track_name, params_yapc, maximum_height):
+def get_atl06p(ATL03_track_name, params_yapc, maximum_height):
+    """
+    This method retrieves the ATL06 data from sliderule and returns a geodataframe. It also applies the corrections and removes the points above the maximum height. If the geodataframe is empty, an exception is raised.
+
+    Parameters:
+    ATL03_track_name (str): The name of the ATL03 track.
+    params_yapc (dict): The parameters for the YAPC correction.
+    maximum_height (float): The maximum height to filter out.
+
+    Returns:
+    geopandas.GeoDataFrame: The geodataframe containing the ATL06 data.
+    """
     print("Retrieving data from sliderule ...")
     gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
 
@@ -122,7 +133,6 @@ def __init__(self, track_name):
             self.hemis = "SH"
         else:
             self.hemis = self.hemis
-        # self.hemis = hemis
         self.set()
         self.track_name_init = track_name
 
@@ -217,12 +227,9 @@ def nsidc_icesat2_get_associated_file(
     import icesat2_toolkit.utilities
 
     AUXILIARY = False
-    # product='ATL03'
     DIRECTORY = None
     FLATTEN = False
     TIMEOUT = 120
-    MODE = 0o775
-    # file_list  = ['ATL07-01_20210301023054_10251001_005_01']
 
     if build and not (username or password):
         urs = "urs.earthdata.nasa.gov"
@@ -497,11 +504,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
         mask_total = mask_seaice | mask_ocean
 
     signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
-    # print(signal_confidence.shape)
-
-    # return signal_confidence
 
-    # Add photon rate and background rate to the reader here
     ATL03.close()
 
     if numpy == True:
@@ -521,8 +524,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
                 "across_track_distance": across_track_distance,
                 "ph_id_count": ph_id_count,
             }
-        )  # ,
-        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
+        )
 
         dF_seg = pd.DataFrame(
             {
@@ -781,10 +783,10 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
 
     ### Primary variables of interest
     vars = [
-        "height_segment_dac",  #
-        "height_segment_ib",  #
-        "height_segment_lpe",  # #
-        "height_segment_mss",  #
+        "height_segment_dac",
+        "height_segment_ib",
+        "height_segment_lpe",
+        "height_segment_mss",
         "height_segment_ocean",
     ]
     D_heights = dict()
@@ -792,7 +794,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
         D_heights[var] = ATL07[beam + "/sea_ice_segments/geophysical/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
     DF = pd.concat(
         {
             "time": dF_time,
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 76f0fcc6..8e05ab67 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -86,7 +86,7 @@
 
 maximum_height = 30  # (meters) maximum height past dem_h correction
 
-gdf = io.get_gdf(ATL03_track_name, params_yapc, maximum_height)
+gdf = io.get_atl06p(ATL03_track_name, params_yapc, maximum_height)
 
 
 cdict = dict()

From 5186087aa3b05a40c0a0b034b3377e18a0fd50ca Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 15:41:19 -0500
Subject: [PATCH 3/6] refactor: io to iotools and step 1

---
 .../ICEsat2_SI_tools/beam_stats.py            | 210 +++++++++++-------
 .../ICEsat2_SI_tools/{io.py => iotools.py}    |   0
 .../analysis_db/B01_SL_load_single_file.py    |   2 +-
 3 files changed, 133 insertions(+), 79 deletions(-)
 rename src/icesat2_tracks/ICEsat2_SI_tools/{io.py => iotools.py} (100%)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
index 7a146e8b..3f815e15 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pandas as pd
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
-import icesat2_tracks.ICEsat2_SI_tools.io as io_local
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io_local
 
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 
-def derive_beam_statistics(Gd, all_beams,  Lmeter=10e3, dx =10):
+
+def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10):
     """
     this method returns a dict of dataframes with the beam statistics
     Gd          is a dict of beam tables or a hdf5 file
@@ -16,18 +17,18 @@ def derive_beam_statistics(Gd, all_beams,  Lmeter=10e3, dx =10):
     """
     import h5py
 
-    D=dict()
+    D = dict()
     for k in all_beams:
         if isinstance(Gd, dict):
-            Gi = Gd[k]    
+            Gi = Gd[k]
         elif isinstance(Gd, h5py.File):
-            Gi  = io_local.get_beam_hdf_store(Gd[k])
+            Gi = io_local.get_beam_hdf_store(Gd[k])
         else:
-            print('Gd is neither dict nor hdf5 file')
+            print("Gd is neither dict nor hdf5 file")
             break
 
-        dd = Gi['h_mean']
-        xx = Gi['dist']
+        dd = Gi["h_mean"]
+        xx = Gi["dist"]
 
         def get_var(sti):
             mask = (sti[0] < xx) & (xx <= sti[1])
@@ -39,31 +40,40 @@ def get_N(sti):
 
         def get_lat(sti):
             mask = (sti[0] < xx) & (xx <= sti[1])
-            return np.nanmean(Gi['lats'][mask])
+            return np.nanmean(Gi["lats"][mask])
 
-        iter_x       = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= False)[1,:]
+        iter_x = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=False
+        )[1, :]
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
         var_list = np.array(list(map(get_var, stencil_iter)))
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
-        N_list   = np.array(list(map(get_N, stencil_iter)))
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
+        N_list = np.array(list(map(get_N, stencil_iter)))
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
         lat_list = np.array(list(map(get_lat, stencil_iter)))
 
-        # make Dataframe 
+        # make Dataframe
         df = pd.DataFrame()
-        df['x'] = iter_x
-        df['lat'] = lat_list
-        df['var']    = var_list
-        df['N']      = N_list * 2* dx / Lmeter
-        
+        df["x"] = iter_x
+        df["lat"] = lat_list
+        df["var"] = var_list
+        df["N"] = N_list * 2 * dx / Lmeter
+
         D[k] = df
 
     return D
 
-def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None):
+
+def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None):
     """
     Plots the beam statistics in a 2 x 2 plot
     D is a dict of dataframes with the beam statistics
@@ -84,63 +94,99 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None):
     # make 2 x 2 plot
     ax1 = plt.subplot(gs[0, 0])
     for k in high_beams:
-        plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color=  col_dict[k], markersize=4, label=k)
+        plt.plot(
+            D[k]["x"] / 1e3,
+            np.sqrt(D[k]["var"]),
+            ".",
+            color=col_dict[k],
+            markersize=4,
+            label=k,
+        )
 
-    plt.title('high beams std', loc='left')
-    plt.ylabel('segment std log(m)')
-    
-    ax1.set_yscale('log')
+    plt.title("high beams std", loc="left")
+    plt.ylabel("segment std log(m)")
+
+    ax1.set_yscale("log")
 
     ax2 = plt.subplot(gs[1, 0])
     for k in high_beams:
-        Di = D[k]['N']
-        Di[Di ==0] =np.nan
-        plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k)
+        Di = D[k]["N"]
+        Di[Di == 0] = np.nan
+        plt.plot(
+            D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k
+        )
 
-    plt.title('high beams N', loc='left')
-    plt.xlabel('along track distance (km)')
-    plt.ylabel('Point Density (m)')
+    plt.title("high beams N", loc="left")
+    plt.xlabel("along track distance (km)")
+    plt.ylabel("Point Density (m)")
 
     ax3 = plt.subplot(gs[0, 1])
     for k in low_beams:
-        plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color=  col_dict[k], markersize=4, label=k)
+        plt.plot(
+            D[k]["x"] / 1e3,
+            np.sqrt(D[k]["var"]),
+            ".",
+            color=col_dict[k],
+            markersize=4,
+            label=k,
+        )
+
+    plt.title("low beams std", loc="left")
 
-    plt.title('low beams std', loc='left')
-    
-    ax3.set_yscale('log')
+    ax3.set_yscale("log")
 
     ax4 = plt.subplot(gs[1, 1])
     for k in low_beams:
-        Di = D[k]['N']
-        Di[Di ==0] =np.nan
-        plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k)
+        Di = D[k]["N"]
+        Di[Di == 0] = np.nan
+        plt.plot(
+            D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k
+        )
 
-    plt.title('low beams N', loc='left')
-    plt.xlabel('along track distance (km)')
-    #plt.ylabel('Point density (m)')
+    plt.title("low beams N", loc="left")
+    plt.xlabel("along track distance (km)")
+    # plt.ylabel('Point density (m)')
 
     ax5 = plt.subplot(gs[0:2, 2])
 
     lat_shift = 0
     for k in low_beams:
         Di = D[k]
-        plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color=  col_dict[k],  label=k, alpha = 0.3)
+        plt.scatter(
+            Di["x"] / 1e3,
+            Di["lat"] + lat_shift,
+            s=np.exp(Di["N"] * 5),
+            marker=".",
+            color=col_dict[k],
+            label=k,
+            alpha=0.3,
+        )
         lat_shift = lat_shift + 2
 
     for k in high_beams:
         Di = D[k]
-        plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color=  col_dict[k],  label=k, alpha = 0.3)
+        plt.scatter(
+            Di["x"] / 1e3,
+            Di["lat"] + lat_shift,
+            s=np.exp(Di["N"] * 5),
+            marker=".",
+            color=col_dict[k],
+            label=k,
+            alpha=0.3,
+        )
         lat_shift = lat_shift + 2
 
-    plt.title('Density in space', loc='left')
-    plt.ylabel('Latitude (deg)')
-    plt.xlabel('along track distance (km)')
+    plt.title("Density in space", loc="left")
+    plt.ylabel("Latitude (deg)")
+    plt.xlabel("along track distance (km)")
     plt.legend()
     plt.show()
 
+
 ## plot track stats basics for sliderules ATL06 output
 
-def plot_ATL06_track_data( G2, cdict):
+
+def plot_ATL06_track_data(G2, cdict):
     """
     Plots the beam statistics in a 3 x 3 plot
     G2      is a GeoDataFrame from SL (ATL06)
@@ -157,42 +203,50 @@ def plot_ATL06_track_data( G2, cdict):
     ax5 = plt.subplot(gs[1, 2])
     ax6 = plt.subplot(gs[2, 2])
 
-    for sp in G2['spot'].unique():
-        Gc = G2[G2['spot'] == 1]
-
-        Gc['h_mean_gradient'] = np.gradient(Gc['h_mean'])
-        ts_config = {'marker': '.', 'markersize': 0.2, 'linestyle': 'none', 'color': cdict[sp], 'alpha': 0.3}
-        hist_confit = {'density': True, 'color': cdict[sp], 'alpha': 0.3}
-
-        ax1.plot(Gc.geometry.y, Gc['h_mean'], **ts_config)
-        ax2.plot(Gc.geometry.y, Gc['h_mean_gradient'], **ts_config)
-        ax3.plot(Gc.geometry.y, Gc['n_fit_photons'], **ts_config)
-
-        Gc['h_mean'].plot.hist(ax=ax4, bins=30, **hist_confit)
-        Gc['h_mean_gradient'].plot.hist(ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit)
-        Gc['rms_misfit'].plot.hist(ax=ax6, bins=30, **hist_confit)
-
-    ax1.set_ylabel('h_mean (m)')
-    ax2.set_ylabel('slope (m/m)')
-    ax3.set_ylabel('N Photons')
-    ax3.set_xlabel('Latitude (degree)')
+    for sp in G2["spot"].unique():
+        Gc = G2[G2["spot"] == 1]
+
+        Gc["h_mean_gradient"] = np.gradient(Gc["h_mean"])
+        ts_config = {
+            "marker": ".",
+            "markersize": 0.2,
+            "linestyle": "none",
+            "color": cdict[sp],
+            "alpha": 0.3,
+        }
+        hist_confit = {"density": True, "color": cdict[sp], "alpha": 0.3}
+
+        ax1.plot(Gc.geometry.y, Gc["h_mean"], **ts_config)
+        ax2.plot(Gc.geometry.y, Gc["h_mean_gradient"], **ts_config)
+        ax3.plot(Gc.geometry.y, Gc["n_fit_photons"], **ts_config)
+
+        Gc["h_mean"].plot.hist(ax=ax4, bins=30, **hist_confit)
+        Gc["h_mean_gradient"].plot.hist(
+            ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit
+        )
+        Gc["rms_misfit"].plot.hist(ax=ax6, bins=30, **hist_confit)
+
+    ax1.set_ylabel("h_mean (m)")
+    ax2.set_ylabel("slope (m/m)")
+    ax3.set_ylabel("N Photons")
+    ax3.set_xlabel("Latitude (degree)")
     ax1.set_xticklabels([])
     ax2.set_xticklabels([])
 
-    ax1.axhline(0, color='k', linestyle='-', linewidth=0.8)
-    ax2.axhline(0, color='k', linestyle='-', linewidth=0.8)
+    ax1.axhline(0, color="k", linestyle="-", linewidth=0.8)
+    ax2.axhline(0, color="k", linestyle="-", linewidth=0.8)
 
-    ax1.set_title('Height', loc='left')
-    ax2.set_title('Slope', loc='left')
-    ax3.set_title('Photons per extend', loc='left')
+    ax1.set_title("Height", loc="left")
+    ax2.set_title("Slope", loc="left")
+    ax3.set_title("Photons per extend", loc="left")
 
-    ax4.set_title('Histograms', loc='left')
-    ax5.set_title('Histograms', loc='left')
+    ax4.set_title("Histograms", loc="left")
+    ax5.set_title("Histograms", loc="left")
 
-    ax6.set_title('Error Hist.', loc='left')
-    ax6.set_xlabel('rms_misfit (m)')
+    ax6.set_title("Error Hist.", loc="left")
+    ax6.set_xlabel("rms_misfit (m)")
 
     for axi in [ax4, ax5, ax6]:
-        axi.set_ylabel('')
+        axi.set_ylabel("")
 
-    return [ax1, ax2, ax3, ax4, ax5, ax6]
\ No newline at end of file
+    return [ax1, ax2, ax3, ax4, ax5, ax6]
diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
similarity index 100%
rename from src/icesat2_tracks/ICEsat2_SI_tools/io.py
rename to src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 8e05ab67..c656a4f4 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -18,7 +18,7 @@
 )
 from icesat2_tracks.ICEsat2_SI_tools import (
     sliderule_converter_tools as sct,
-    io,
+    iotools as io,
     beam_stats,
 )
 from icesat2_tracks.local_modules import m_tools_ph3 as MT, m_general_ph3 as M

From eca286c251e040a95c709559171afae0ed5189f3 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 15:52:31 -0500
Subject: [PATCH 4/6] fix: iotools in steps 2-5

---
 .../analysis_db/A02c_IOWAGA_thredds_prior.py  |  5 ++--
 .../analysis_db/B02_make_spectra_gFT.py       |  4 +--
 .../analysis_db/B03_plot_spectra_ov.py        | 25 +++++++++++--------
 src/icesat2_tracks/analysis_db/B04_angle.py   |  2 +-
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
index 8e4fa11e..1fa8068a 100644
--- a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
+++ b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
@@ -10,7 +10,7 @@
 from siphon.catalog import TDSCatalog
 
 from icesat2_tracks.config.IceSAT2_startup import mconfig
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.wave_tools as waves
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 import icesat2_tracks.local_modules.m_general_ph3 as M
@@ -420,6 +420,7 @@ def test_nan_frac(imask):
 except:
     target_name = "A02_" + track_name + "_hindcast_fail"
 
+
 def plot_prior(Prior, axx):
     angle = Prior["incident_angle"][
         "value"
@@ -531,7 +532,7 @@ def plot_prior(Prior, axx):
     ax1.axis("equal")
 
     F.save_pup(path=plot_path, name=plot_name + "_hindcast_prior")
-except  Exception as e:
+except Exception as e:
     print(e)
     print("print 2nd figure failed")
 
diff --git a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
index 00da0ff5..a9cb6b90 100644
--- a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
+++ b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
@@ -14,7 +14,7 @@
 import xarray as xr
 
 import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
 
 import time
@@ -31,7 +31,7 @@
 import tracemalloc
 
 
-def linear_gap_fill(F,key_lead, key_int):
+def linear_gap_fill(F, key_lead, key_int):
     """
     F pd.DataFrame
     key_lead   key in F that determined the independent coordindate
diff --git a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
index ca7984d2..bae0de4a 100644
--- a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
+++ b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
@@ -6,14 +6,19 @@
 import numpy as np
 import xarray as xr
 from matplotlib.gridspec import GridSpec
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 from icesat2_tracks.local_modules import m_general_ph3 as M
-from icesat2_tracks.config.IceSAT2_startup import mconfig, color_schemes, plt, font_for_print
+from icesat2_tracks.config.IceSAT2_startup import (
+    mconfig,
+    color_schemes,
+    plt,
+    font_for_print,
+)
 
 track_name, batch_key, test_flag = io.init_from_input(
-    sys.argv # TODO: Handle via CLI
+    sys.argv  # TODO: Handle via CLI
 )  # loads standard experiment
 hemis, batch = batch_key.split("_")
 
@@ -21,7 +26,7 @@
 load_file = load_path + "B02_" + track_name
 plot_path = (
     mconfig["paths"]["plot"] + "/" + hemis + "/" + batch_key + "/" + track_name + "/"
-) # TODO: Update with pathlib
+)  # TODO: Update with pathlib
 MT.mkdirs_r(plot_path)
 
 Gk = xr.open_dataset(load_file + "_gFT_k.nc")
@@ -481,11 +486,11 @@ def plot_model_eta(D, ax, offset=0, **kargs):
             dd = Gk_1.gFT_PSD_data.rolling(k=10, min_periods=1, center=True).mean()
             plt.plot(Gk_1.k, dd, color=col_d[k], linewidth=0.8)
             # handle the 'All-NaN slice encountered' warning
-            if np.all(np.isnan(dd.data)): 
+            if np.all(np.isnan(dd.data)):
                 dd_max.append(np.nan)
             else:
                 dd_max.append(np.nanmax(dd.data))
-                
+
             plt.xlim(klim)
             if lflag:
                 plt.ylabel("$(m/m)^2/k$")
@@ -495,11 +500,11 @@ def plot_model_eta(D, ax, offset=0, **kargs):
 
         ax11.axvline(k_thresh, linewidth=1, color="gray", alpha=1)
         ax11.axvspan(k_thresh, klim[-1], color="gray", alpha=0.5, zorder=12)
-    
+
     if not np.all(np.isnan(dd_max)):
-            max_vale = np.nanmax(dd_max)
-            for ax in ax1_list:
-                ax.set_ylim(0,max_vale  * 1.1)
+        max_vale = np.nanmax(dd_max)
+        for ax in ax1_list:
+            ax.set_ylim(0, max_vale * 1.1)
 
     ax0 = F.fig.add_subplot(gs[-2:, :])
 
diff --git a/src/icesat2_tracks/analysis_db/B04_angle.py b/src/icesat2_tracks/analysis_db/B04_angle.py
index d4067845..0b1e269b 100644
--- a/src/icesat2_tracks/analysis_db/B04_angle.py
+++ b/src/icesat2_tracks/analysis_db/B04_angle.py
@@ -14,7 +14,7 @@
 
 
 import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import xarray as xr
 import numpy as np
 

From cf2a1d3548f0434b2d009412d9e51165a0055db7 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 16:08:25 -0500
Subject: [PATCH 5/6] refactor: move imports to the top iotools.py module

---
 .../ICEsat2_SI_tools/iotools.py               | 61 +++++--------------
 1 file changed, 14 insertions(+), 47 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
index 193229de..468fb21f 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
@@ -1,5 +1,19 @@
+import os
+import re
+import json
+import warnings
+from datetime import datetime
+from netrc import netrc
+from lxml import etree
+from posixpath import join as posixpath_join
+from pandas import HDFStore
+from pandas.io.pytables import PerformanceWarning
+import pandas as pd
+import h5py
 from sliderule import icesat2
 from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct
+import icesat2_toolkit.utilities
+import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
 
 
 def init_from_input(arguments):
@@ -73,8 +87,6 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"):
 
 
 def ID_to_str(ID_name):
-    from datetime import datetime
-
     IDs = ID_name.split("_")
     date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d")
     return IDs[0] + " " + date + " granule: " + IDs[2]
@@ -107,8 +119,6 @@ class case_ID:
     """docstring for case_ID"""
 
     def __init__(self, track_name):
-        import re
-
         track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
         track_name_rx = re.compile(track_name_pattern)
@@ -219,12 +229,6 @@ def nsidc_icesat2_get_associated_file(
     ATL03, (or, ATL10, ATL07, not tested)
 
     """
-    import netrc
-    import lxml
-    import re
-    import posixpath
-    import os
-    import icesat2_toolkit.utilities
 
     AUXILIARY = False
     DIRECTORY = None
@@ -307,9 +311,6 @@ def nsidc_icesat2_get_associated_file(
 
 
 def json_load(name, path, verbose=False):
-    import json
-    import os
-
     full_name = os.path.join(path, name + ".json")
 
     with open(full_name, "r") as ifile:
@@ -329,7 +330,6 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
     sd                  '2019.03.01'- subdirectory on ATLAS
     file_name           'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory
     """
-    import icesat2_toolkit.utilities
 
     HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name]
     print("download to:", dpath + "/" + HOST[-1])
@@ -346,15 +346,9 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
 
 
 def save_pandas_table(table_dict, name, save_path):
-    import os
-
     if not os.path.exists(save_path):
         os.makedirs(save_path)
 
-    import warnings
-    from pandas import HDFStore
-    from pandas.io.pytables import PerformanceWarning
-
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
     with HDFStore(save_path + "/" + name + ".h5") as store:
@@ -363,10 +357,6 @@ def save_pandas_table(table_dict, name, save_path):
 
 
 def load_pandas_table_dict(name, save_path):
-    import warnings
-    from pandas import HDFStore
-    from pandas.io.pytables import PerformanceWarning
-
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
     return_dict = dict()
@@ -378,8 +368,6 @@ def load_pandas_table_dict(name, save_path):
 
 
 def get_beam_hdf_store(ATL03_k):
-    import pandas as pd
-
     DD = pd.DataFrame()  # columns = ATL03.keys())
     for ikey in ATL03_k.keys():
         DD[ikey] = ATL03_k[ikey]
@@ -388,17 +376,12 @@ def get_beam_hdf_store(ATL03_k):
 
 
 def get_beam_var_hdf_store(ATL03_k, ikey):
-    import pandas as pd
-
     DD = pd.DataFrame()  # columns = ATL03.keys())
     DD[ikey] = ATL03_k[ikey]
     return DD
 
 
 def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
-    import os
-    import h5py
-
     mode = "w" if mode is None else mode
     if not os.path.exists(path):
         os.makedirs(path)
@@ -420,8 +403,6 @@ def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
 
 def get_time_for_track(delta_time, atlas_epoch):
     "returns pandas dataframe"
-    import pandas as pd
-    import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
 
     # Conversion of delta_time to a calendar date
     temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
@@ -446,8 +427,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     beam    key of the iceSAT2 beam.
     """
     # Add in a proper description of the function here
-    import h5py
-    import pandas as pd
 
     # Open the file
     ATL03 = h5py.File(fileT, "r")
@@ -556,9 +535,6 @@ def getATL03_height_correction(fileT, beam="gt1r"):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL03 = h5py.File(fileT, "r")
 
@@ -585,9 +561,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 
@@ -669,9 +642,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 
@@ -756,9 +726,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 

From 24c49962f1f99e015419a42af4f9ed60875bcf8e Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 17:20:56 -0500
Subject: [PATCH 6/6] refactor: iotools.py for better readability and
 efficiency

- Refactor getATL03_beam: Simplified the logic of the function to improve readability and performance

- Update case_ID class: Add comments to clarify the purpose and functionality of the class. Also, simplified some of its logic for better performance and readability

- Use pathlib where appropriate
---
 .../ICEsat2_SI_tools/iotools.py               | 102 +++++++++---------
 1 file changed, 48 insertions(+), 54 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
index 468fb21f..e92ee495 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
@@ -1,6 +1,7 @@
 import os
 import re
 import json
+from pathlib import Path
 import warnings
 from datetime import datetime
 from netrc import netrc
@@ -108,7 +109,7 @@ def get_atl06p(ATL03_track_name, params_yapc, maximum_height):
     gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
 
     if gdf.empty:
-        raise Exception("Empty Geodataframe. No data could be retrieved.")
+        raise ValueError("Empty Geodataframe. No data could be retrieved.")
 
     print("Initial data retrieved")
     gdf = sct.correct_and_remove_height(gdf, maximum_height)
@@ -121,28 +122,30 @@ class case_ID:
     def __init__(self, track_name):
         track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
+        # Compile the regular expression pattern for track names
         track_name_rx = re.compile(track_name_pattern)
+
+        # Use the compiled regular expression to find all matches in the track name
+        # The pop() method is used to get the last (or only) match
+        # The result is a tuple, which is unpacked into several properties of the current object
         (
-            self.hemis,
-            self.YY,
-            self.MM,
-            self.DD,
-            self.HH,
-            self.MN,
-            self.SS,
-            self.TRK,
-            self.CYC,
-            self.GRN,
-            self.RL,
-            self.VRS,
+            self.hemis,  # Hemisphere
+            self.YY,  # Year
+            self.MM,  # Month
+            self.DD,  # Day
+            self.HH,  # Hour
+            self.MN,  # Minute
+            self.SS,  # Second
+            self.TRK,  # Track
+            self.CYC,  # Cycle
+            self.GRN,  # Granule
+            self.RL,  # Release
+            self.VRS,  # Version
         ) = track_name_rx.findall(track_name).pop()
 
-        if self.hemis == "01":
-            self.hemis = "NH"
-        elif self.hemis == "02":
-            self.hemis = "SH"
-        else:
-            self.hemis = self.hemis
+        hemis_map = {"01": "NH", "02": "SH"}
+        self.hemis = hemis_map.get(self.hemis, self.hemis)
+
         self.set()
         self.track_name_init = track_name
 
@@ -346,12 +349,12 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
 
 
 def save_pandas_table(table_dict, name, save_path):
-    if not os.path.exists(save_path):
-        os.makedirs(save_path)
+    save_path = Path(save_path)
+    save_path.mkdir(parents=True, exist_ok=True)
 
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
-    with HDFStore(save_path + "/" + name + ".h5") as store:
+    with HDFStore(save_path / f"{name}.h5") as store:
         for name, table in table_dict.items():
             store[name] = table
 
@@ -382,23 +385,18 @@ def get_beam_var_hdf_store(ATL03_k, ikey):
 
 
 def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
-    mode = "w" if mode is None else mode
-    if not os.path.exists(path):
-        os.makedirs(path)
+    path = Path(path)
+    path.mkdir(parents=True, exist_ok=True)
 
-    full_name = os.path.join(path, name + ".h5")
-    store = h5py.File(full_name, mode)
-
-    for k in data_dict.keys():
-        store1 = store.create_group(k)
-        for kk, I in list(data_dict[k].items()):
-            store1[kk] = I
-        # store1.close()
-
-    store.close()
+    full_name = path / (name + ".h5")
+    with h5py.File(str(full_name), mode) as store:
+        for k in data_dict.keys():
+            store1 = store.create_group(k)
+            for kk, I in list(data_dict[k].items()):
+                store1[kk] = I
 
     if verbose:
-        print("saved at: " + full_name)
+        print(f"saved at: {full_name}")
 
 
 def get_time_for_track(delta_time, atlas_epoch):
@@ -452,7 +450,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
 
     # Photon height
     heights = ATL03[beam + "/heights/h_ph"][:]
-    # print(heights.shape)
 
     # Flag for signal confidence
     # column index:  0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater
@@ -464,25 +461,25 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     # --  3: medium
     # --  4: high
 
-    mask_ocean = (
-        ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2
-    )  # ocean points  medium or high quality
-    mask_seaice = (
-        ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2
-    )  # sea ice points medium or high quality
+    heighs_signal_conf_ph = "/heights/signal_conf_ph"
+    quality_threshold = 2
+    beam_data = ATL03[beam + heighs_signal_conf_ph]
+
+    # ocean points  medium or high quality
+    mask_ocean = beam_data[:, 1] > quality_threshold
+    # sea ice points medium or high quality
+    mask_seaice = beam_data[:, 2] > quality_threshold
     mask_total = mask_seaice | mask_ocean
 
-    if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size:
+    if sum(~mask_total) == beam_data[:, 1].size:
         print("zero photons, lower photon quality to 2 or higher")
-        mask_ocean = (
-            ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1
-        )  # ocean points  medium or high quality
-        mask_seaice = (
-            ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1
-        )  # sea ice points medium or high quality
+        # lower quality threshold and recompute
+        quality_threshold = 1
+        mask_ocean = beam_data[:, 1] > quality_threshold
+        mask_seaice = beam_data[:, 2] > quality_threshold
         mask_total = mask_seaice | mask_ocean
 
-    signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
+    signal_confidence = ATL03[beam + heighs_signal_conf_ph][:, 1:3].max(1)
 
     ATL03.close()
 
@@ -600,7 +597,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
         "height_segment_w_gaussian",  # Width of Gaussian fit
         "height_segment_quality",  # Height quality flag, 1 for good fit, 0 for bad
     ]
-    # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
 
     D_heights = dict()
     for var in vars:
@@ -621,7 +617,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
         D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:]
     dF_env = pd.DataFrame(D_env)
 
-    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
     DF = pd.concat(
         {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1
     )
@@ -646,7 +641,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
-    # f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
 
     vars_bulk = [
         "seg_dist_x",