From 6aac4408a3df8b9151539b9bad5a9428a627e8fc Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Mon, 22 Jan 2024 16:14:53 -0500 Subject: [PATCH 1/6] fix: isolate data pull --- src/icesat2_tracks/ICEsat2_SI_tools/io.py | 795 ++++++++++-------- .../analysis_db/B01_SL_load_single_file.py | 39 +- 2 files changed, 468 insertions(+), 366 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py index 521e305c..f3349744 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py @@ -1,41 +1,50 @@ +from sliderule import icesat2 +from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct def init_from_input(arguments): - if (len(arguments) <= 1) | ('-f' in set(arguments) ) : + """ + Initializes the variables track_name, batch_key, and test_flag based on the input arguments. + + Parameters: + arguments (list): A list of input arguments. - track_name='20190605061807_10380310_004_01' - batch_key='SH_batch01' + Returns: + tuple: A tuple containing the values of track_name, batch_key, and test_flag. + """ + + if (len(arguments) <= 1) | ("-f" in set(arguments)): + track_name = "20190605061807_10380310_004_01" + batch_key = "SH_batch01" test_flag = True - print('use standard values') + print("use standard values") else: + track_name = arguments[1] + batch_key = arguments[2] + # $(hemisphere) $(coords) $(config) - track_name=arguments[1] - batch_key =arguments[2] - #$(hemisphere) $(coords) $(config) - - print('read vars from file: ' +str(arguments[1]) ) + print("read vars from file: " + str(arguments[1])) - if (len(arguments) >= 4): - if arguments[3] == 'True': + if len(arguments) >= 4: + if arguments[3] == "True": test_flag = True - elif arguments[3] == 'False': + elif arguments[3] == "False": test_flag = False else: - test_flag= arguments[3] + test_flag = arguments[3] - print('test_flag found, test_flag= '+str(test_flag) ) + print("test_flag found, test_flag= " + str(test_flag)) else: - test_flag=False + test_flag = False print(track_name) - - print('----- batch ='+ batch_key) - print('----- test_flag: ' + str(test_flag)) + print("----- batch =" + batch_key) + print("----- test_flag: " + str(test_flag)) return track_name, batch_key, test_flag -def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'): +def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"): """ Takes inputs and retrieves the ID, track_names that can be loaded, hemis, batch inputs: are the outputs from init_from_input, specifically @@ -49,103 +58,146 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'): """ print(ID_name, batch_key, ID_flag) - hemis, batch = batch_key.split('_') + hemis, batch = batch_key.split("_") if ID_flag: - ID_path = ID_root +'/'+batch_key+'/'+prefix+'/' - ID = json_load( prefix +'_'+ID_name, ID_path ) - track_names = ID['tracks']['ATL03'] + ID_path = ID_root + "/" + batch_key + "/" + prefix + "/" + ID = json_load(prefix + "_" + ID_name, ID_path) + track_names = ID["tracks"]["ATL03"] else: - track_names = ['ATL03_'+ID_name] - ID = ID_name + track_names = ["ATL03_" + ID_name] + ID = ID_name return ID, track_names, hemis, batch + def ID_to_str(ID_name): from datetime import datetime - IDs = ID_name.split('_') - date = datetime.strptime(IDs[1], '%Y%m%d').strftime('%Y-%m-%d')#.strftime('%m/%d/%Y') - date - return IDs[0] +' ' +date +' granule: ' + IDs[2] + IDs = ID_name.split("_") + date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d") + return IDs[0] + " " + date + " granule: " + IDs[2] + + +def get_gdf(ATL03_track_name, params_yapc, maximum_height): + print("Retrieving data from sliderule ...") + gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name]) + + if gdf.empty: + raise Exception("Empty Geodataframe. No data could be retrieved.") + + print("Initial data retrieved") + gdf = sct.correct_and_remove_height(gdf, maximum_height) + return gdf + class case_ID: """docstring for case_ID""" + def __init__(self, track_name): import re - track_name_pattern = r'(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?' - case_ID_pattern = r'(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})' - track_name_rx = re.compile(track_name_pattern) - self.hemis,self.YY,self.MM,self.DD,self.HH,self.MN,self.SS,self.TRK,self.CYC,self.GRN,self.RL,self.VRS = track_name_rx.findall(track_name).pop() + track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?" - if self.hemis == '01': - self.hemis = 'NH' - elif self.hemis == '02': - self.hemis = 'SH' + track_name_rx = re.compile(track_name_pattern) + ( + self.hemis, + self.YY, + self.MM, + self.DD, + self.HH, + self.MN, + self.SS, + self.TRK, + self.CYC, + self.GRN, + self.RL, + self.VRS, + ) = track_name_rx.findall(track_name).pop() + + if self.hemis == "01": + self.hemis = "NH" + elif self.hemis == "02": + self.hemis = "SH" else: self.hemis = self.hemis - #self.hemis = hemis + # self.hemis = hemis self.set() self.track_name_init = track_name def set(self): - block1 = (self.YY,self.MM,self.DD) - block2 = (self.TRK,self.CYC,self.GRN) + block1 = (self.YY, self.MM, self.DD) + block2 = (self.TRK, self.CYC, self.GRN) - self.ID = self.hemis+'_'+''.join(block1) +'_'+ ''.join(block2) + self.ID = self.hemis + "_" + "".join(block1) + "_" + "".join(block2) return self.ID def get_granule(self): - return ''.join((self.TRK,self.CYC,self.GRN)) + return "".join((self.TRK, self.CYC, self.GRN)) def set_dummy(self): - block1 = (self.YY,self.MM,self.DD) - block2 = (self.TRK,self.CYC,self.GRN) + block1 = (self.YY, self.MM, self.DD) + block2 = (self.TRK, self.CYC, self.GRN) - self.ID_dummy = ''.join(block1) +'_'+ ''.join(block2) + self.ID_dummy = "".join(block1) + "_" + "".join(block2) return self.ID_dummy def set_ATL03_trackname(self): - - block1 = (self.YY,self.MM,self.DD) - block1b = (self.HH,self.MN,self.SS) - block2 = (self.TRK,self.CYC,self.GRN) - if self.RL is '': + block1 = (self.YY, self.MM, self.DD) + block1b = (self.HH, self.MN, self.SS) + block2 = (self.TRK, self.CYC, self.GRN) + if self.RL is "": raise ValueError("RL not set") - if self.VRS is '': + if self.VRS is "": raise ValueError("VRS not set") - block3 = (self.RL,self.VRS) + block3 = (self.RL, self.VRS) - self.ID_ATL03 = ''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3) + self.ID_ATL03 = ( + "".join(block1) + + "".join(block1b) + + "_" + + "".join(block2) + + "_" + + "_".join(block3) + ) return self.ID_ATL03 def set_ATL10_trackname(self): - - block1 = (self.YY,self.MM,self.DD) - block1b = (self.HH,self.MN,self.SS) - block2 = (self.TRK,self.CYC, '01') # granule is alwasy '01' for ATL10 - if self.RL is '': + block1 = (self.YY, self.MM, self.DD) + block1b = (self.HH, self.MN, self.SS) + block2 = (self.TRK, self.CYC, "01") # granule is alwasy '01' for ATL10 + if self.RL is "": raise ValueError("RL not set") - if self.VRS is '': + if self.VRS is "": raise ValueError("VRS not set") - block3 = (self.RL,self.VRS) + block3 = (self.RL, self.VRS) - if self.hemis == 'NH': - hemis = '01' - elif self.hemis == 'SH': - hemis = '02' + if self.hemis == "NH": + hemis = "01" + elif self.hemis == "SH": + hemis = "02" else: hemis = self.hemis - self.ID_ATL10 = hemis+'_'+''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3) + self.ID_ATL10 = ( + hemis + + "_" + + "".join(block1) + + "".join(block1b) + + "_" + + "".join(block2) + + "_" + + "_".join(block3) + ) return self.ID_ATL10 -def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=None, password=None): +def nsidc_icesat2_get_associated_file( + file_list, product, build=True, username=None, password=None +): """ THis method returns assocociated files names and paths for files given in file_list for the "product" ICEsat2 product @@ -163,105 +215,104 @@ def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=N import posixpath import os import icesat2_toolkit.utilities - AUXILIARY=False - #product='ATL03' - DIRECTORY= None - FLATTEN=False - TIMEOUT=120 - MODE=0o775 - #file_list = ['ATL07-01_20210301023054_10251001_005_01'] + + AUXILIARY = False + # product='ATL03' + DIRECTORY = None + FLATTEN = False + TIMEOUT = 120 + MODE = 0o775 + # file_list = ['ATL07-01_20210301023054_10251001_005_01'] if build and not (username or password): - urs = 'urs.earthdata.nasa.gov' - username,login,password = netrc.netrc().authenticators(urs) - #-- build urllib2 opener and check credentials + urs = "urs.earthdata.nasa.gov" + username, login, password = netrc.netrc().authenticators(urs) + # -- build urllib2 opener and check credentials if build: - #-- build urllib2 opener with credentials + # -- build urllib2 opener with credentials icesat2_toolkit.utilities.build_opener(username, password) - #-- check credentials + # -- check credentials icesat2_toolkit.utilities.check_credentials() parser = lxml.etree.HTMLParser() - #-- remote https server for ICESat-2 Data - HOST = 'https://n5eil01u.ecs.nsidc.org' - #-- regular expression operator for extracting information from files - rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})' - r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})') - #-- regular expression pattern for finding specific files - regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$' - remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})' - r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}') - - # rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})' - # r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$') - # #-- regular expression pattern for finding specific files - # regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$' - # remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})' - # r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}') - - #-- build list of remote files, remote modification times and local files + # -- remote https server for ICESat-2 Data + HOST = "https://n5eil01u.ecs.nsidc.org" + # -- regular expression operator for extracting information from files + rx = re.compile( + r"(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})" + r"(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})" + ) + # -- regular expression pattern for finding specific files + regex_suffix = "(.*?)$" if AUXILIARY else "(h5)$" + remote_regex_pattern = ( + r"{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})" + r"(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}" + ) + + # -- build list of remote files, remote modification times and local files original_files = [] remote_files = [] remote_mtimes = [] local_files = [] - remote_names =[] + remote_names = [] for input_file in file_list: - #print(input_file) - #-- extract parameters from ICESat-2 ATLAS HDF5 file name - SUB,PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYC,GRN,RL,VRS = \ - rx.findall(input_file).pop() - #-- get directories from remote directory - product_directory = '{0}.{1}'.format(product,RL) - sd = '{0}.{1}.{2}'.format(YY,MM,DD) - PATH = [HOST,'ATLAS',product_directory,sd] - #-- local and remote data directories - remote_dir=posixpath.join(*PATH) - temp=os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY - local_dir=os.path.expanduser(temp) if FLATTEN else os.path.join(temp,sd) - #-- create output directory if not currently existing + # print(input_file) + # -- extract parameters from ICESat-2 ATLAS HDF5 file name + SUB, PRD, HEM, YY, MM, DD, HH, MN, SS, TRK, CYC, GRN, RL, VRS = rx.findall( + input_file + ).pop() + # -- get directories from remote directory + product_directory = "{0}.{1}".format(product, RL) + sd = "{0}.{1}.{2}".format(YY, MM, DD) + PATH = [HOST, "ATLAS", product_directory, sd] + # -- local and remote data directories + remote_dir = posixpath.join(*PATH) + temp = os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY + local_dir = os.path.expanduser(temp) if FLATTEN else os.path.join(temp, sd) + # -- create output directory if not currently existing # if not os.access(local_dir, os.F_OK): # os.makedirs(local_dir, MODE) - #-- compile regular expression operator for file parameters - args = (product,TRK,CYC,GRN,RL,regex_suffix) + # -- compile regular expression operator for file parameters + args = (product, TRK, CYC, GRN, RL, regex_suffix) R1 = re.compile(remote_regex_pattern.format(*args), re.VERBOSE) - #-- find associated ICESat-2 data file - #-- find matching files (for granule, release, version, track) - colnames,collastmod,colerror=icesat2_toolkit.utilities.nsidc_list(PATH, - build=False, - timeout=TIMEOUT, - parser=parser, - pattern=R1, - sort=True) + # -- find associated ICESat-2 data file + # -- find matching files (for granule, release, version, track) + colnames, collastmod, colerror = icesat2_toolkit.utilities.nsidc_list( + PATH, build=False, timeout=TIMEOUT, parser=parser, pattern=R1, sort=True + ) print(colnames) - #-- print if file was not found + # -- print if file was not found if not colnames: print(colerror) continue - #-- add to lists - for colname,remote_mtime in zip(colnames,collastmod): - #-- save original file to list (expands if getting auxiliary files) + # -- add to lists + for colname, remote_mtime in zip(colnames, collastmod): + # -- save original file to list (expands if getting auxiliary files) original_files.append(input_file) - #-- remote and local versions of the file - remote_files.append(posixpath.join(remote_dir,colname)) - local_files.append(os.path.join(local_dir,colname)) + # -- remote and local versions of the file + remote_files.append(posixpath.join(remote_dir, colname)) + local_files.append(os.path.join(local_dir, colname)) remote_mtimes.append(remote_mtime) remote_names.append(colname) - return original_files, remote_files, remote_names #product_directory, sd, + return original_files, remote_files, remote_names # product_directory, sd, + def json_load(name, path, verbose=False): import json import os - full_name= (os.path.join(path,name+ '.json')) - with open(full_name, 'r') as ifile: - data=json.load(ifile) + full_name = os.path.join(path, name + ".json") + + with open(full_name, "r") as ifile: + data = json.load(ifile) if verbose: - print('loaded from: ',full_name) + print("loaded from: ", full_name) return data -def ATL03_download(username,password, dpath, product_directory, sd, file_name): + +def ATL03_download(username, password, dpath, product_directory, sd, file_name): """ inputs: username: username for https://urs.earthdata.nasa.gov @@ -272,102 +323,114 @@ def ATL03_download(username,password, dpath, product_directory, sd, file_name): file_name 'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory """ import icesat2_toolkit.utilities - from icesat2_toolkit.read_ICESat2_ATL03 import read_HDF5_ATL03 - HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS',product_directory,sd, file_name] - # HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS','ATL03.003','2018.10.14', - # 'ATL03_20181014000347_02350101_003_01.h5'] - print('download to:', dpath+'/'+HOST[-1]) - buffer,error=icesat2_toolkit.utilities.from_nsidc(HOST,username=username, - password=password,local=dpath+'/'+HOST[-1],verbose=True) - #-- raise exception if download error + + HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name] + print("download to:", dpath + "/" + HOST[-1]) + buffer, error = icesat2_toolkit.utilities.from_nsidc( + HOST, + username=username, + password=password, + local=dpath + "/" + HOST[-1], + verbose=True, + ) + # -- raise exception if download error if not buffer: raise Exception(error) -def save_pandas_table(table_dict, name , save_path): + +def save_pandas_table(table_dict, name, save_path): import os + if not os.path.exists(save_path): os.makedirs(save_path) import warnings from pandas import HDFStore from pandas.io.pytables import PerformanceWarning - warnings.filterwarnings('ignore',category=PerformanceWarning) - with HDFStore(save_path+'/'+name+'.h5') as store: - for name,table in table_dict.items(): - store[name]=table + warnings.filterwarnings("ignore", category=PerformanceWarning) + + with HDFStore(save_path + "/" + name + ".h5") as store: + for name, table in table_dict.items(): + store[name] = table + -def load_pandas_table_dict(name , save_path): +def load_pandas_table_dict(name, save_path): import warnings from pandas import HDFStore from pandas.io.pytables import PerformanceWarning - warnings.filterwarnings('ignore',category=PerformanceWarning) - return_dict=dict() - with HDFStore(save_path+'/'+name+'.h5') as store: - #print(store) - #print(store.keys()) + warnings.filterwarnings("ignore", category=PerformanceWarning) + + return_dict = dict() + with HDFStore(save_path + "/" + name + ".h5") as store: for k in store.keys(): - return_dict[k[1:]]=store.get(k) + return_dict[k[1:]] = store.get(k) return return_dict -def get_beam_hdf_store(ATL03_k): +def get_beam_hdf_store(ATL03_k): import pandas as pd - DD = pd.DataFrame()#columns = ATL03.keys()) + + DD = pd.DataFrame() # columns = ATL03.keys()) for ikey in ATL03_k.keys(): DD[ikey] = ATL03_k[ikey] return DD -def get_beam_var_hdf_store(ATL03_k, ikey): +def get_beam_var_hdf_store(ATL03_k, ikey): import pandas as pd - DD = pd.DataFrame()#columns = ATL03.keys()) + + DD = pd.DataFrame() # columns = ATL03.keys()) DD[ikey] = ATL03_k[ikey] return DD -def write_track_to_HDF5(data_dict, name, path, verbose=False, mode='w'): +def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"): import os import h5py - mode = 'w' if mode is None else mode + + mode = "w" if mode is None else mode if not os.path.exists(path): os.makedirs(path) - full_name= (os.path.join(path,name+ '.h5')) + full_name = os.path.join(path, name + ".h5") store = h5py.File(full_name, mode) for k in data_dict.keys(): store1 = store.create_group(k) for kk, I in list(data_dict[k].items()): - store1[kk]=I - #store1.close() + store1[kk] = I + # store1.close() store.close() if verbose: - print('saved at: ' +full_name) + print("saved at: " + full_name) def get_time_for_track(delta_time, atlas_epoch): "returns pandas dataframe" import pandas as pd - import convert_GPS_time as cGPS + import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS + # Conversion of delta_time to a calendar date temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0) - year = temp['year'][:].astype('int') - month = temp['month'][:].astype('int') - day = temp['day'][:].astype('int') - hour = temp['hour'][:].astype('int') - minute = temp['minute'][:].astype('int') - second = temp['second'][:].astype('int') + year = temp["year"][:].astype("int") + month = temp["month"][:].astype("int") + day = temp["day"][:].astype("int") + hour = temp["hour"][:].astype("int") + second = temp["second"][:].astype("int") + + return pd.DataFrame( + {"year": year, "month": month, "day": day, "hour": hour, "second": second} + ) - return pd.DataFrame({'year':year, 'month':month, 'day':day, 'hour':hour, 'second':second}) -def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6): +def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): """ returns 'beam' from fileT as pandas table. fillT path of file @@ -377,78 +440,66 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6): """ # Add in a proper description of the function here import h5py - import convert_GPS_time as cGPS import pandas as pd + # Open the file - ATL03 = h5py.File(fileT, 'r') - lons = ATL03[beam+'/heights/lon_ph'][:] - lats = ATL03[beam+'/heights/lat_ph'][:] + ATL03 = h5py.File(fileT, "r") + lons = ATL03[beam + "/heights/lon_ph"][:] + lats = ATL03[beam + "/heights/lat_ph"][:] # Along track distance from equator i think. - along_track_distance=ATL03[beam+'/heights/dist_ph_along'][:] - across_track_distance=ATL03[beam+'/heights/dist_ph_across'][:] - #dem_h = ATL03[beam+'/geophys_corr/dem_h'][:] - #delta_time_dem_h = ATL03[beam+'/geophys_corr/delta_time'][:] - segment_dist_x=ATL03[beam+'/geolocation/segment_dist_x'][:] - segment_length=ATL03[beam+'/geolocation/segment_length'][:] - segment_id = ATL03[beam+'/geolocation/segment_id'][:] + along_track_distance = ATL03[beam + "/heights/dist_ph_along"][:] + across_track_distance = ATL03[beam + "/heights/dist_ph_across"][:] + segment_dist_x = ATL03[beam + "/geolocation/segment_dist_x"][:] + segment_length = ATL03[beam + "/geolocation/segment_length"][:] + segment_id = ATL03[beam + "/geolocation/segment_id"][:] - delta_time_geolocation = ATL03[beam+'/geolocation/delta_time'][:] - reference_photon_index= ATL03[beam+'/geolocation/reference_photon_index'][:] - ph_index_beg= ATL03[beam+'/geolocation/ph_index_beg'][:] + delta_time_geolocation = ATL03[beam + "/geolocation/delta_time"][:] + reference_photon_index = ATL03[beam + "/geolocation/reference_photon_index"][:] + ph_index_beg = ATL03[beam + "/geolocation/ph_index_beg"][:] - - ph_id_count = ATL03[beam+'/heights/ph_id_count'][:] + ph_id_count = ATL03[beam + "/heights/ph_id_count"][:] # Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980 - delta_time = ATL03[beam+'/heights/delta_time'][:] - #podppd_flag=ATL03[beam+'/geolocation/podppd_flag'][:] - - # #Add this value to delta time parameters to compute full gps_seconds - atlas_epoch = ATL03['/ancillary_data/atlas_sdp_gps_epoch'][:] - - # Conversion of delta_time to a calendar date - temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0) - - # Express delta_time relative to start time of granule - #delta_time_granule=delta_time-delta_time[0] - - # year = temp['year'][:].astype('int') - # month = temp['month'][:].astype('int') - # day = temp['day'][:].astype('int') - # hour = temp['hour'][:].astype('int') - # minute = temp['minute'][:].astype('int') - # second = temp['second'][:].astype('int') + delta_time = ATL03[beam + "/heights/delta_time"][:] # Primary variables of interest # Photon height - heights=ATL03[beam+'/heights/h_ph'][:] - #print(heights.shape) + heights = ATL03[beam + "/heights/h_ph"][:] + # print(heights.shape) # Flag for signal confidence # column index: 0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater # values: - #-- -1: Events not associated with a specific surface type - #-- 0: noise - #-- 1: buffer but algorithm classifies as background - #-- 2: low - #-- 3: medium - #-- 4: high - - mask_ocean = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 2 # ocean points medium or high quality - mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 2 # sea ice points medium or high quality - mask_total = (mask_seaice | mask_ocean) - - if sum(~mask_total) == (ATL03[beam+'/heights/signal_conf_ph'][:, 1]).size: - print('zero photons, lower photon quality to 2 or higher') - mask_ocean = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 1 # ocean points medium or high quality - mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 1 # sea ice points medium or high quality - mask_total = (mask_seaice | mask_ocean) - - signal_confidence = ATL03[beam+'/heights/signal_conf_ph'][:, 1:3].max(1) - #print(signal_confidence.shape) - - #return signal_confidence + # -- -1: Events not associated with a specific surface type + # -- 0: noise + # -- 1: buffer but algorithm classifies as background + # -- 2: low + # -- 3: medium + # -- 4: high + + mask_ocean = ( + ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2 + ) # ocean points medium or high quality + mask_seaice = ( + ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2 + ) # sea ice points medium or high quality + mask_total = mask_seaice | mask_ocean + + if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size: + print("zero photons, lower photon quality to 2 or higher") + mask_ocean = ( + ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1 + ) # ocean points medium or high quality + mask_seaice = ( + ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1 + ) # sea ice points medium or high quality + mask_total = mask_seaice | mask_ocean + + signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1) + # print(signal_confidence.shape) + + # return signal_confidence # Add photon rate and background rate to the reader here ATL03.close() @@ -458,26 +509,45 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6): return along_track_dist, elev else: - dF = pd.DataFrame({'heights':heights, 'lons':lons, 'lats':lats, 'signal_confidence':signal_confidence, 'mask_seaice':mask_seaice, - 'delta_time':delta_time, 'along_track_distance':along_track_distance, #'delta_time_granule':delta_time_granule, - 'across_track_distance':across_track_distance,'ph_id_count':ph_id_count})#, - #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second}) - - dF_seg = pd.DataFrame({'delta_time':delta_time_geolocation, 'segment_dist_x':segment_dist_x, 'segment_length':segment_length, 'segment_id':segment_id, - 'reference_photon_index':reference_photon_index, 'ph_index_beg':ph_index_beg}) + dF = pd.DataFrame( + { + "heights": heights, + "lons": lons, + "lats": lats, + "signal_confidence": signal_confidence, + "mask_seaice": mask_seaice, + "delta_time": delta_time, + "along_track_distance": along_track_distance, #'delta_time_granule':delta_time_granule, + "across_track_distance": across_track_distance, + "ph_id_count": ph_id_count, + } + ) # , + #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second}) + + dF_seg = pd.DataFrame( + { + "delta_time": delta_time_geolocation, + "segment_dist_x": segment_dist_x, + "segment_length": segment_length, + "segment_id": segment_id, + "reference_photon_index": reference_photon_index, + "ph_index_beg": ph_index_beg, + } + ) # Filter out high elevation values - print('seg_dist shape ', segment_dist_x.shape) - print('df shape ',dF.shape) + print("seg_dist shape ", segment_dist_x.shape) + print("df shape ", dF.shape) dF = dF[mask_total] - #dF_seg = dF_seg[mask_total] - #print('df[mask] shape ',dF.shape) + # dF_seg = dF_seg[mask_total] + # print('df[mask] shape ',dF.shape) # Reset row indexing - #dF=dF#.reset_index(drop=True) + # dF=dF#.reset_index(drop=True) return dF, dF_seg -def getATL03_height_correction(fileT, beam='gt1r'): + +def getATL03_height_correction(fileT, beam="gt1r"): """ This method returns relevant data for wave estimates from ALT 07 tracks. returns: Pandas data frame @@ -486,25 +556,27 @@ def getATL03_height_correction(fileT, beam='gt1r'): import h5py import pandas as pd + # Open the file - ATL03 = h5py.File(fileT, 'r') + ATL03 = h5py.File(fileT, "r") ### bulk positions and statistics vars_bulk = [ - 'delta_time', # referenc time since equator crossing - 'dem_h', # best giod approxiamtion - ] + "delta_time", # referenc time since equator crossing + "dem_h", # best giod approxiamtion + ] - D_bulk= dict() + D_bulk = dict() for var in vars_bulk: - D_bulk[var] = ATL03[beam+'/geophys_corr/'+var][:] + D_bulk[var] = ATL03[beam + "/geophys_corr/" + var][:] dF_bulk = pd.DataFrame(D_bulk) ATL03.close() return dF_bulk -def getATL07_beam(fileT, beam='gt1r', maxElev=1e6): + +def getATL07_beam(fileT, beam="gt1r", maxElev=1e6): """ This method returns relevant data for wave estimates from ALT 07 tracks. returns: Pandas data frame @@ -513,81 +585,82 @@ def getATL07_beam(fileT, beam='gt1r', maxElev=1e6): import h5py import pandas as pd + # Open the file - ATL07 = h5py.File(fileT, 'r') + ATL07 = h5py.File(fileT, "r") ### bulk positions and statistics vars_bulk = [ - 'longitude', - 'latitude', - 'height_segment_id',# Height segment ID (10 km segments) - 'seg_dist_x' # Along track distance from the equator crossing to the segment center. - ] + "longitude", + "latitude", + "height_segment_id", # Height segment ID (10 km segments) + "seg_dist_x", # Along track distance from the equator crossing to the segment center. + ] - D_bulk= dict() + D_bulk = dict() for var in vars_bulk: - D_bulk[var] = ATL07[beam+'/sea_ice_segments/'+var] + D_bulk[var] = ATL07[beam + "/sea_ice_segments/" + var] dF_bulk = pd.DataFrame(D_bulk) # Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980 - delta_time=ATL07[beam+'/sea_ice_segments/delta_time'][:] + delta_time = ATL07[beam + "/sea_ice_segments/delta_time"][:] # #Add this value to delta time parameters to compute full gps_seconds - atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:] - dF_time = get_time_for_track(delta_time,atlas_epoch) - dF_time['delta_time'] = delta_time + atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:] + dF_time = get_time_for_track(delta_time, atlas_epoch) + dF_time["delta_time"] = delta_time ### Primary variables of interest - vars = [ - 'across_track_distance', #Across track distance of photons averaged over the sea ice height segment. - 'height_segment_asr_calc', #Computed apparent surface reflectance for the sea ice segment. - 'height_segment_confidence',# # Height segment confidence flag - 'height_segment_fit_quality_flag', # Flag Values: ['-1', '1', '2', '3', '4', '5'] - #Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor'] - 'height_segment_height', # Beam segment height - 'height_segment_length_seg', # Along track length of segment - 'height_segment_ssh_flag', #Flag for potential leads, 0=sea ice, 1 = sea surface - 'height_segment_surface_error_est', #Error estimate of the surface height - 'height_segment_type',# Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] - # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth' - 'height_segment_w_gaussian', # Width of Gaussian fit - 'height_segment_quality', # Height quality flag, 1 for good fit, 0 for bad - ] - #vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag'] - - D_heights=dict() + "across_track_distance", # Across track distance of photons averaged over the sea ice height segment. + "height_segment_asr_calc", # Computed apparent surface reflectance for the sea ice segment. + "height_segment_confidence", # # Height segment confidence flag + "height_segment_fit_quality_flag", # Flag Values: ['-1', '1', '2', '3', '4', '5'] + # Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor'] + "height_segment_height", # Beam segment height + "height_segment_length_seg", # Along track length of segment + "height_segment_ssh_flag", # Flag for potential leads, 0=sea ice, 1 = sea surface + "height_segment_surface_error_est", # Error estimate of the surface height + "height_segment_type", # Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth' + "height_segment_w_gaussian", # Width of Gaussian fit + "height_segment_quality", # Height quality flag, 1 for good fit, 0 for bad + ] + # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag'] + + D_heights = dict() for var in vars: - D_heights[var] = ATL07[beam+'/sea_ice_segments/heights/' +var][:] + D_heights[var] = ATL07[beam + "/sea_ice_segments/heights/" + var][:] dF_heights = pd.DataFrame(D_heights) - vars_env = { - 'mss':'geophysical/height_segment_mss', # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model. - 't2m':'geophysical/height_segment_t2m',#Temperature at 2m above the displacement height (K) - 'u2m':'geophysical/height_segment_u2m',#Eastward wind at 2m above the displacement height (m/s-1) - 'v2m':'geophysical/height_segment_v2m',#Northward wind at 2m above the displacement height (m/s-1) - 'n_photons_actual':'stats/n_photons_actual', # Number of photons gathered - 'photon_rate':'stats/photon_rate', #photon_rate + "mss": "geophysical/height_segment_mss", # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model. + "t2m": "geophysical/height_segment_t2m", # Temperature at 2m above the displacement height (K) + "u2m": "geophysical/height_segment_u2m", # Eastward wind at 2m above the displacement height (m/s-1) + "v2m": "geophysical/height_segment_v2m", # Northward wind at 2m above the displacement height (m/s-1) + "n_photons_actual": "stats/n_photons_actual", # Number of photons gathered + "photon_rate": "stats/photon_rate", # photon_rate } - D_env=dict() - for var,I in vars_env.items(): - D_env[ var] = ATL07[beam+'/sea_ice_segments/' +I][:] + D_env = dict() + for var, I in vars_env.items(): + D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:] dF_env = pd.DataFrame(D_env) - - #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0) - DF = pd.concat({ 'time': dF_time, 'ref': dF_bulk, 'heights': dF_heights, 'env': dF_env }, axis=1) + # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0) + DF = pd.concat( + {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1 + ) ATL07.close() # Filter out high elevation values - DF = DF[(DF['heights']['height_segment_height'] Date: Mon, 22 Jan 2024 16:36:42 -0500 Subject: [PATCH 2/6] fix: use better name for wrapper --- src/icesat2_tracks/ICEsat2_SI_tools/io.py | 33 ++++++++++--------- .../analysis_db/B01_SL_load_single_file.py | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py index f3349744..193229de 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py @@ -80,7 +80,18 @@ def ID_to_str(ID_name): return IDs[0] + " " + date + " granule: " + IDs[2] -def get_gdf(ATL03_track_name, params_yapc, maximum_height): +def get_atl06p(ATL03_track_name, params_yapc, maximum_height): + """ + This method retrieves the ATL06 data from sliderule and returns a geodataframe. It also applies the corrections and removes the points above the maximum height. If the geodataframe is empty, an exception is raised. + + Parameters: + ATL03_track_name (str): The name of the ATL03 track. + params_yapc (dict): The parameters for the YAPC correction. + maximum_height (float): The maximum height to filter out. + + Returns: + geopandas.GeoDataFrame: The geodataframe containing the ATL06 data. + """ print("Retrieving data from sliderule ...") gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name]) @@ -122,7 +133,6 @@ def __init__(self, track_name): self.hemis = "SH" else: self.hemis = self.hemis - # self.hemis = hemis self.set() self.track_name_init = track_name @@ -217,12 +227,9 @@ def nsidc_icesat2_get_associated_file( import icesat2_toolkit.utilities AUXILIARY = False - # product='ATL03' DIRECTORY = None FLATTEN = False TIMEOUT = 120 - MODE = 0o775 - # file_list = ['ATL07-01_20210301023054_10251001_005_01'] if build and not (username or password): urs = "urs.earthdata.nasa.gov" @@ -497,11 +504,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): mask_total = mask_seaice | mask_ocean signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1) - # print(signal_confidence.shape) - - # return signal_confidence - # Add photon rate and background rate to the reader here ATL03.close() if numpy == True: @@ -521,8 +524,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): "across_track_distance": across_track_distance, "ph_id_count": ph_id_count, } - ) # , - #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second}) + ) dF_seg = pd.DataFrame( { @@ -781,10 +783,10 @@ def getATL07_height_corrections(fileT, beam="gt1r"): ### Primary variables of interest vars = [ - "height_segment_dac", # - "height_segment_ib", # - "height_segment_lpe", # # - "height_segment_mss", # + "height_segment_dac", + "height_segment_ib", + "height_segment_lpe", + "height_segment_mss", "height_segment_ocean", ] D_heights = dict() @@ -792,7 +794,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"): D_heights[var] = ATL07[beam + "/sea_ice_segments/geophysical/" + var][:] dF_heights = pd.DataFrame(D_heights) - # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0) DF = pd.concat( { "time": dF_time, diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py index 76f0fcc6..8e05ab67 100644 --- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py +++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py @@ -86,7 +86,7 @@ maximum_height = 30 # (meters) maximum height past dem_h correction -gdf = io.get_gdf(ATL03_track_name, params_yapc, maximum_height) +gdf = io.get_atl06p(ATL03_track_name, params_yapc, maximum_height) cdict = dict() From 5186087aa3b05a40c0a0b034b3377e18a0fd50ca Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Tue, 30 Jan 2024 15:41:19 -0500 Subject: [PATCH 3/6] refactor: io to iotools and step 1 --- .../ICEsat2_SI_tools/beam_stats.py | 210 +++++++++++------- .../ICEsat2_SI_tools/{io.py => iotools.py} | 0 .../analysis_db/B01_SL_load_single_file.py | 2 +- 3 files changed, 133 insertions(+), 79 deletions(-) rename src/icesat2_tracks/ICEsat2_SI_tools/{io.py => iotools.py} (100%) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py index 7a146e8b..3f815e15 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py @@ -1,12 +1,13 @@ import numpy as np import pandas as pd import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec -import icesat2_tracks.ICEsat2_SI_tools.io as io_local +import icesat2_tracks.ICEsat2_SI_tools.iotools as io_local import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec -def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx =10): + +def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10): """ this method returns a dict of dataframes with the beam statistics Gd is a dict of beam tables or a hdf5 file @@ -16,18 +17,18 @@ def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx =10): """ import h5py - D=dict() + D = dict() for k in all_beams: if isinstance(Gd, dict): - Gi = Gd[k] + Gi = Gd[k] elif isinstance(Gd, h5py.File): - Gi = io_local.get_beam_hdf_store(Gd[k]) + Gi = io_local.get_beam_hdf_store(Gd[k]) else: - print('Gd is neither dict nor hdf5 file') + print("Gd is neither dict nor hdf5 file") break - dd = Gi['h_mean'] - xx = Gi['dist'] + dd = Gi["h_mean"] + xx = Gi["dist"] def get_var(sti): mask = (sti[0] < xx) & (xx <= sti[1]) @@ -39,31 +40,40 @@ def get_N(sti): def get_lat(sti): mask = (sti[0] < xx) & (xx <= sti[1]) - return np.nanmean(Gi['lats'][mask]) + return np.nanmean(Gi["lats"][mask]) - iter_x = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= False)[1,:] + iter_x = spec.create_chunk_boundaries_unit_lengths( + Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=False + )[1, :] - stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True) + stencil_iter = spec.create_chunk_boundaries_unit_lengths( + Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True + ) var_list = np.array(list(map(get_var, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True) - N_list = np.array(list(map(get_N, stencil_iter))) + stencil_iter = spec.create_chunk_boundaries_unit_lengths( + Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True + ) + N_list = np.array(list(map(get_N, stencil_iter))) - stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True) + stencil_iter = spec.create_chunk_boundaries_unit_lengths( + Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True + ) lat_list = np.array(list(map(get_lat, stencil_iter))) - # make Dataframe + # make Dataframe df = pd.DataFrame() - df['x'] = iter_x - df['lat'] = lat_list - df['var'] = var_list - df['N'] = N_list * 2* dx / Lmeter - + df["x"] = iter_x + df["lat"] = lat_list + df["var"] = var_list + df["N"] = N_list * 2 * dx / Lmeter + D[k] = df return D -def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None): + +def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None): """ Plots the beam statistics in a 2 x 2 plot D is a dict of dataframes with the beam statistics @@ -84,63 +94,99 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None): # make 2 x 2 plot ax1 = plt.subplot(gs[0, 0]) for k in high_beams: - plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color= col_dict[k], markersize=4, label=k) + plt.plot( + D[k]["x"] / 1e3, + np.sqrt(D[k]["var"]), + ".", + color=col_dict[k], + markersize=4, + label=k, + ) - plt.title('high beams std', loc='left') - plt.ylabel('segment std log(m)') - - ax1.set_yscale('log') + plt.title("high beams std", loc="left") + plt.ylabel("segment std log(m)") + + ax1.set_yscale("log") ax2 = plt.subplot(gs[1, 0]) for k in high_beams: - Di = D[k]['N'] - Di[Di ==0] =np.nan - plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k) + Di = D[k]["N"] + Di[Di == 0] = np.nan + plt.plot( + D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k + ) - plt.title('high beams N', loc='left') - plt.xlabel('along track distance (km)') - plt.ylabel('Point Density (m)') + plt.title("high beams N", loc="left") + plt.xlabel("along track distance (km)") + plt.ylabel("Point Density (m)") ax3 = plt.subplot(gs[0, 1]) for k in low_beams: - plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color= col_dict[k], markersize=4, label=k) + plt.plot( + D[k]["x"] / 1e3, + np.sqrt(D[k]["var"]), + ".", + color=col_dict[k], + markersize=4, + label=k, + ) + + plt.title("low beams std", loc="left") - plt.title('low beams std', loc='left') - - ax3.set_yscale('log') + ax3.set_yscale("log") ax4 = plt.subplot(gs[1, 1]) for k in low_beams: - Di = D[k]['N'] - Di[Di ==0] =np.nan - plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k) + Di = D[k]["N"] + Di[Di == 0] = np.nan + plt.plot( + D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k + ) - plt.title('low beams N', loc='left') - plt.xlabel('along track distance (km)') - #plt.ylabel('Point density (m)') + plt.title("low beams N", loc="left") + plt.xlabel("along track distance (km)") + # plt.ylabel('Point density (m)') ax5 = plt.subplot(gs[0:2, 2]) lat_shift = 0 for k in low_beams: Di = D[k] - plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color= col_dict[k], label=k, alpha = 0.3) + plt.scatter( + Di["x"] / 1e3, + Di["lat"] + lat_shift, + s=np.exp(Di["N"] * 5), + marker=".", + color=col_dict[k], + label=k, + alpha=0.3, + ) lat_shift = lat_shift + 2 for k in high_beams: Di = D[k] - plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color= col_dict[k], label=k, alpha = 0.3) + plt.scatter( + Di["x"] / 1e3, + Di["lat"] + lat_shift, + s=np.exp(Di["N"] * 5), + marker=".", + color=col_dict[k], + label=k, + alpha=0.3, + ) lat_shift = lat_shift + 2 - plt.title('Density in space', loc='left') - plt.ylabel('Latitude (deg)') - plt.xlabel('along track distance (km)') + plt.title("Density in space", loc="left") + plt.ylabel("Latitude (deg)") + plt.xlabel("along track distance (km)") plt.legend() plt.show() + ## plot track stats basics for sliderules ATL06 output -def plot_ATL06_track_data( G2, cdict): + +def plot_ATL06_track_data(G2, cdict): """ Plots the beam statistics in a 3 x 3 plot G2 is a GeoDataFrame from SL (ATL06) @@ -157,42 +203,50 @@ def plot_ATL06_track_data( G2, cdict): ax5 = plt.subplot(gs[1, 2]) ax6 = plt.subplot(gs[2, 2]) - for sp in G2['spot'].unique(): - Gc = G2[G2['spot'] == 1] - - Gc['h_mean_gradient'] = np.gradient(Gc['h_mean']) - ts_config = {'marker': '.', 'markersize': 0.2, 'linestyle': 'none', 'color': cdict[sp], 'alpha': 0.3} - hist_confit = {'density': True, 'color': cdict[sp], 'alpha': 0.3} - - ax1.plot(Gc.geometry.y, Gc['h_mean'], **ts_config) - ax2.plot(Gc.geometry.y, Gc['h_mean_gradient'], **ts_config) - ax3.plot(Gc.geometry.y, Gc['n_fit_photons'], **ts_config) - - Gc['h_mean'].plot.hist(ax=ax4, bins=30, **hist_confit) - Gc['h_mean_gradient'].plot.hist(ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit) - Gc['rms_misfit'].plot.hist(ax=ax6, bins=30, **hist_confit) - - ax1.set_ylabel('h_mean (m)') - ax2.set_ylabel('slope (m/m)') - ax3.set_ylabel('N Photons') - ax3.set_xlabel('Latitude (degree)') + for sp in G2["spot"].unique(): + Gc = G2[G2["spot"] == 1] + + Gc["h_mean_gradient"] = np.gradient(Gc["h_mean"]) + ts_config = { + "marker": ".", + "markersize": 0.2, + "linestyle": "none", + "color": cdict[sp], + "alpha": 0.3, + } + hist_confit = {"density": True, "color": cdict[sp], "alpha": 0.3} + + ax1.plot(Gc.geometry.y, Gc["h_mean"], **ts_config) + ax2.plot(Gc.geometry.y, Gc["h_mean_gradient"], **ts_config) + ax3.plot(Gc.geometry.y, Gc["n_fit_photons"], **ts_config) + + Gc["h_mean"].plot.hist(ax=ax4, bins=30, **hist_confit) + Gc["h_mean_gradient"].plot.hist( + ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit + ) + Gc["rms_misfit"].plot.hist(ax=ax6, bins=30, **hist_confit) + + ax1.set_ylabel("h_mean (m)") + ax2.set_ylabel("slope (m/m)") + ax3.set_ylabel("N Photons") + ax3.set_xlabel("Latitude (degree)") ax1.set_xticklabels([]) ax2.set_xticklabels([]) - ax1.axhline(0, color='k', linestyle='-', linewidth=0.8) - ax2.axhline(0, color='k', linestyle='-', linewidth=0.8) + ax1.axhline(0, color="k", linestyle="-", linewidth=0.8) + ax2.axhline(0, color="k", linestyle="-", linewidth=0.8) - ax1.set_title('Height', loc='left') - ax2.set_title('Slope', loc='left') - ax3.set_title('Photons per extend', loc='left') + ax1.set_title("Height", loc="left") + ax2.set_title("Slope", loc="left") + ax3.set_title("Photons per extend", loc="left") - ax4.set_title('Histograms', loc='left') - ax5.set_title('Histograms', loc='left') + ax4.set_title("Histograms", loc="left") + ax5.set_title("Histograms", loc="left") - ax6.set_title('Error Hist.', loc='left') - ax6.set_xlabel('rms_misfit (m)') + ax6.set_title("Error Hist.", loc="left") + ax6.set_xlabel("rms_misfit (m)") for axi in [ax4, ax5, ax6]: - axi.set_ylabel('') + axi.set_ylabel("") - return [ax1, ax2, ax3, ax4, ax5, ax6] \ No newline at end of file + return [ax1, ax2, ax3, ax4, ax5, ax6] diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py similarity index 100% rename from src/icesat2_tracks/ICEsat2_SI_tools/io.py rename to src/icesat2_tracks/ICEsat2_SI_tools/iotools.py diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py index 8e05ab67..c656a4f4 100644 --- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py +++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py @@ -18,7 +18,7 @@ ) from icesat2_tracks.ICEsat2_SI_tools import ( sliderule_converter_tools as sct, - io, + iotools as io, beam_stats, ) from icesat2_tracks.local_modules import m_tools_ph3 as MT, m_general_ph3 as M From eca286c251e040a95c709559171afae0ed5189f3 Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Tue, 30 Jan 2024 15:52:31 -0500 Subject: [PATCH 4/6] fix: iotools in steps 2-5 --- .../analysis_db/A02c_IOWAGA_thredds_prior.py | 5 ++-- .../analysis_db/B02_make_spectra_gFT.py | 4 +-- .../analysis_db/B03_plot_spectra_ov.py | 25 +++++++++++-------- src/icesat2_tracks/analysis_db/B04_angle.py | 2 +- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py index 8e4fa11e..1fa8068a 100644 --- a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py +++ b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py @@ -10,7 +10,7 @@ from siphon.catalog import TDSCatalog from icesat2_tracks.config.IceSAT2_startup import mconfig -import icesat2_tracks.ICEsat2_SI_tools.io as io +import icesat2_tracks.ICEsat2_SI_tools.iotools as io import icesat2_tracks.ICEsat2_SI_tools.wave_tools as waves import icesat2_tracks.local_modules.m_tools_ph3 as MT import icesat2_tracks.local_modules.m_general_ph3 as M @@ -420,6 +420,7 @@ def test_nan_frac(imask): except: target_name = "A02_" + track_name + "_hindcast_fail" + def plot_prior(Prior, axx): angle = Prior["incident_angle"][ "value" @@ -531,7 +532,7 @@ def plot_prior(Prior, axx): ax1.axis("equal") F.save_pup(path=plot_path, name=plot_name + "_hindcast_prior") -except Exception as e: +except Exception as e: print(e) print("print 2nd figure failed") diff --git a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py index 00da0ff5..a9cb6b90 100644 --- a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py +++ b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py @@ -14,7 +14,7 @@ import xarray as xr import h5py -import icesat2_tracks.ICEsat2_SI_tools.io as io +import icesat2_tracks.ICEsat2_SI_tools.iotools as io import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec import time @@ -31,7 +31,7 @@ import tracemalloc -def linear_gap_fill(F,key_lead, key_int): +def linear_gap_fill(F, key_lead, key_int): """ F pd.DataFrame key_lead key in F that determined the independent coordindate diff --git a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py index ca7984d2..bae0de4a 100644 --- a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py +++ b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py @@ -6,14 +6,19 @@ import numpy as np import xarray as xr from matplotlib.gridspec import GridSpec -import icesat2_tracks.ICEsat2_SI_tools.io as io +import icesat2_tracks.ICEsat2_SI_tools.iotools as io import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT import icesat2_tracks.local_modules.m_tools_ph3 as MT from icesat2_tracks.local_modules import m_general_ph3 as M -from icesat2_tracks.config.IceSAT2_startup import mconfig, color_schemes, plt, font_for_print +from icesat2_tracks.config.IceSAT2_startup import ( + mconfig, + color_schemes, + plt, + font_for_print, +) track_name, batch_key, test_flag = io.init_from_input( - sys.argv # TODO: Handle via CLI + sys.argv # TODO: Handle via CLI ) # loads standard experiment hemis, batch = batch_key.split("_") @@ -21,7 +26,7 @@ load_file = load_path + "B02_" + track_name plot_path = ( mconfig["paths"]["plot"] + "/" + hemis + "/" + batch_key + "/" + track_name + "/" -) # TODO: Update with pathlib +) # TODO: Update with pathlib MT.mkdirs_r(plot_path) Gk = xr.open_dataset(load_file + "_gFT_k.nc") @@ -481,11 +486,11 @@ def plot_model_eta(D, ax, offset=0, **kargs): dd = Gk_1.gFT_PSD_data.rolling(k=10, min_periods=1, center=True).mean() plt.plot(Gk_1.k, dd, color=col_d[k], linewidth=0.8) # handle the 'All-NaN slice encountered' warning - if np.all(np.isnan(dd.data)): + if np.all(np.isnan(dd.data)): dd_max.append(np.nan) else: dd_max.append(np.nanmax(dd.data)) - + plt.xlim(klim) if lflag: plt.ylabel("$(m/m)^2/k$") @@ -495,11 +500,11 @@ def plot_model_eta(D, ax, offset=0, **kargs): ax11.axvline(k_thresh, linewidth=1, color="gray", alpha=1) ax11.axvspan(k_thresh, klim[-1], color="gray", alpha=0.5, zorder=12) - + if not np.all(np.isnan(dd_max)): - max_vale = np.nanmax(dd_max) - for ax in ax1_list: - ax.set_ylim(0,max_vale * 1.1) + max_vale = np.nanmax(dd_max) + for ax in ax1_list: + ax.set_ylim(0, max_vale * 1.1) ax0 = F.fig.add_subplot(gs[-2:, :]) diff --git a/src/icesat2_tracks/analysis_db/B04_angle.py b/src/icesat2_tracks/analysis_db/B04_angle.py index d4067845..0b1e269b 100644 --- a/src/icesat2_tracks/analysis_db/B04_angle.py +++ b/src/icesat2_tracks/analysis_db/B04_angle.py @@ -14,7 +14,7 @@ import h5py -import icesat2_tracks.ICEsat2_SI_tools.io as io +import icesat2_tracks.ICEsat2_SI_tools.iotools as io import xarray as xr import numpy as np From cf2a1d3548f0434b2d009412d9e51165a0055db7 Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Tue, 30 Jan 2024 16:08:25 -0500 Subject: [PATCH 5/6] refactor: move imports to the top iotools.py module --- .../ICEsat2_SI_tools/iotools.py | 61 +++++-------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py index 193229de..468fb21f 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py @@ -1,5 +1,19 @@ +import os +import re +import json +import warnings +from datetime import datetime +from netrc import netrc +from lxml import etree +from posixpath import join as posixpath_join +from pandas import HDFStore +from pandas.io.pytables import PerformanceWarning +import pandas as pd +import h5py from sliderule import icesat2 from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct +import icesat2_toolkit.utilities +import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS def init_from_input(arguments): @@ -73,8 +87,6 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"): def ID_to_str(ID_name): - from datetime import datetime - IDs = ID_name.split("_") date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d") return IDs[0] + " " + date + " granule: " + IDs[2] @@ -107,8 +119,6 @@ class case_ID: """docstring for case_ID""" def __init__(self, track_name): - import re - track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?" track_name_rx = re.compile(track_name_pattern) @@ -219,12 +229,6 @@ def nsidc_icesat2_get_associated_file( ATL03, (or, ATL10, ATL07, not tested) """ - import netrc - import lxml - import re - import posixpath - import os - import icesat2_toolkit.utilities AUXILIARY = False DIRECTORY = None @@ -307,9 +311,6 @@ def nsidc_icesat2_get_associated_file( def json_load(name, path, verbose=False): - import json - import os - full_name = os.path.join(path, name + ".json") with open(full_name, "r") as ifile: @@ -329,7 +330,6 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name): sd '2019.03.01'- subdirectory on ATLAS file_name 'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory """ - import icesat2_toolkit.utilities HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name] print("download to:", dpath + "/" + HOST[-1]) @@ -346,15 +346,9 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name): def save_pandas_table(table_dict, name, save_path): - import os - if not os.path.exists(save_path): os.makedirs(save_path) - import warnings - from pandas import HDFStore - from pandas.io.pytables import PerformanceWarning - warnings.filterwarnings("ignore", category=PerformanceWarning) with HDFStore(save_path + "/" + name + ".h5") as store: @@ -363,10 +357,6 @@ def save_pandas_table(table_dict, name, save_path): def load_pandas_table_dict(name, save_path): - import warnings - from pandas import HDFStore - from pandas.io.pytables import PerformanceWarning - warnings.filterwarnings("ignore", category=PerformanceWarning) return_dict = dict() @@ -378,8 +368,6 @@ def load_pandas_table_dict(name, save_path): def get_beam_hdf_store(ATL03_k): - import pandas as pd - DD = pd.DataFrame() # columns = ATL03.keys()) for ikey in ATL03_k.keys(): DD[ikey] = ATL03_k[ikey] @@ -388,17 +376,12 @@ def get_beam_hdf_store(ATL03_k): def get_beam_var_hdf_store(ATL03_k, ikey): - import pandas as pd - DD = pd.DataFrame() # columns = ATL03.keys()) DD[ikey] = ATL03_k[ikey] return DD def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"): - import os - import h5py - mode = "w" if mode is None else mode if not os.path.exists(path): os.makedirs(path) @@ -420,8 +403,6 @@ def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"): def get_time_for_track(delta_time, atlas_epoch): "returns pandas dataframe" - import pandas as pd - import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS # Conversion of delta_time to a calendar date temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0) @@ -446,8 +427,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): beam key of the iceSAT2 beam. """ # Add in a proper description of the function here - import h5py - import pandas as pd # Open the file ATL03 = h5py.File(fileT, "r") @@ -556,9 +535,6 @@ def getATL03_height_correction(fileT, beam="gt1r"): """ # Add in a proper description of the function here - import h5py - import pandas as pd - # Open the file ATL03 = h5py.File(fileT, "r") @@ -585,9 +561,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6): """ # Add in a proper description of the function here - import h5py - import pandas as pd - # Open the file ATL07 = h5py.File(fileT, "r") @@ -669,9 +642,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6): """ # Add in a proper description of the function here - import h5py - import pandas as pd - # Open the file ATL07 = h5py.File(fileT, "r") @@ -756,9 +726,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"): """ # Add in a proper description of the function here - import h5py - import pandas as pd - # Open the file ATL07 = h5py.File(fileT, "r") From 24c49962f1f99e015419a42af4f9ed60875bcf8e Mon Sep 17 00:00:00 2001 From: Carlos Paniagua Date: Tue, 30 Jan 2024 17:20:56 -0500 Subject: [PATCH 6/6] refactor: iotools.py for better readability and efficiency - Refactor getATL03_beam: Simplified the logic of the function to improve readability and performance - Update case_ID class: Add comments to clarify the purpose and functionality of the class. Also, simplified some of its logic for better performance and readability - Use pathlib where appropriate --- .../ICEsat2_SI_tools/iotools.py | 102 +++++++++--------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py index 468fb21f..e92ee495 100644 --- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py +++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py @@ -1,6 +1,7 @@ import os import re import json +from pathlib import Path import warnings from datetime import datetime from netrc import netrc @@ -108,7 +109,7 @@ def get_atl06p(ATL03_track_name, params_yapc, maximum_height): gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name]) if gdf.empty: - raise Exception("Empty Geodataframe. No data could be retrieved.") + raise ValueError("Empty Geodataframe. No data could be retrieved.") print("Initial data retrieved") gdf = sct.correct_and_remove_height(gdf, maximum_height) @@ -121,28 +122,30 @@ class case_ID: def __init__(self, track_name): track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?" + # Compile the regular expression pattern for track names track_name_rx = re.compile(track_name_pattern) + + # Use the compiled regular expression to find all matches in the track name + # The pop() method is used to get the last (or only) match + # The result is a tuple, which is unpacked into several properties of the current object ( - self.hemis, - self.YY, - self.MM, - self.DD, - self.HH, - self.MN, - self.SS, - self.TRK, - self.CYC, - self.GRN, - self.RL, - self.VRS, + self.hemis, # Hemisphere + self.YY, # Year + self.MM, # Month + self.DD, # Day + self.HH, # Hour + self.MN, # Minute + self.SS, # Second + self.TRK, # Track + self.CYC, # Cycle + self.GRN, # Granule + self.RL, # Release + self.VRS, # Version ) = track_name_rx.findall(track_name).pop() - if self.hemis == "01": - self.hemis = "NH" - elif self.hemis == "02": - self.hemis = "SH" - else: - self.hemis = self.hemis + hemis_map = {"01": "NH", "02": "SH"} + self.hemis = hemis_map.get(self.hemis, self.hemis) + self.set() self.track_name_init = track_name @@ -346,12 +349,12 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name): def save_pandas_table(table_dict, name, save_path): - if not os.path.exists(save_path): - os.makedirs(save_path) + save_path = Path(save_path) + save_path.mkdir(parents=True, exist_ok=True) warnings.filterwarnings("ignore", category=PerformanceWarning) - with HDFStore(save_path + "/" + name + ".h5") as store: + with HDFStore(save_path / f"{name}.h5") as store: for name, table in table_dict.items(): store[name] = table @@ -382,23 +385,18 @@ def get_beam_var_hdf_store(ATL03_k, ikey): def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"): - mode = "w" if mode is None else mode - if not os.path.exists(path): - os.makedirs(path) + path = Path(path) + path.mkdir(parents=True, exist_ok=True) - full_name = os.path.join(path, name + ".h5") - store = h5py.File(full_name, mode) - - for k in data_dict.keys(): - store1 = store.create_group(k) - for kk, I in list(data_dict[k].items()): - store1[kk] = I - # store1.close() - - store.close() + full_name = path / (name + ".h5") + with h5py.File(str(full_name), mode) as store: + for k in data_dict.keys(): + store1 = store.create_group(k) + for kk, I in list(data_dict[k].items()): + store1[kk] = I if verbose: - print("saved at: " + full_name) + print(f"saved at: {full_name}") def get_time_for_track(delta_time, atlas_epoch): @@ -452,7 +450,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): # Photon height heights = ATL03[beam + "/heights/h_ph"][:] - # print(heights.shape) # Flag for signal confidence # column index: 0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater @@ -464,25 +461,25 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6): # -- 3: medium # -- 4: high - mask_ocean = ( - ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2 - ) # ocean points medium or high quality - mask_seaice = ( - ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2 - ) # sea ice points medium or high quality + heighs_signal_conf_ph = "/heights/signal_conf_ph" + quality_threshold = 2 + beam_data = ATL03[beam + heighs_signal_conf_ph] + + # ocean points medium or high quality + mask_ocean = beam_data[:, 1] > quality_threshold + # sea ice points medium or high quality + mask_seaice = beam_data[:, 2] > quality_threshold mask_total = mask_seaice | mask_ocean - if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size: + if sum(~mask_total) == beam_data[:, 1].size: print("zero photons, lower photon quality to 2 or higher") - mask_ocean = ( - ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1 - ) # ocean points medium or high quality - mask_seaice = ( - ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1 - ) # sea ice points medium or high quality + # lower quality threshold and recompute + quality_threshold = 1 + mask_ocean = beam_data[:, 1] > quality_threshold + mask_seaice = beam_data[:, 2] > quality_threshold mask_total = mask_seaice | mask_ocean - signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1) + signal_confidence = ATL03[beam + heighs_signal_conf_ph][:, 1:3].max(1) ATL03.close() @@ -600,7 +597,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6): "height_segment_w_gaussian", # Width of Gaussian fit "height_segment_quality", # Height quality flag, 1 for good fit, 0 for bad ] - # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag'] D_heights = dict() for var in vars: @@ -621,7 +617,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6): D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:] dF_env = pd.DataFrame(D_env) - # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0) DF = pd.concat( {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1 ) @@ -646,7 +641,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6): ATL07 = h5py.File(fileT, "r") ### bulk positions and statistics - # f['gt1r/freeboard_beam_segment/beam_freeboard'].keys() vars_bulk = [ "seg_dist_x",