Skip to content

Commit

Permalink
making surface height calc failsafe
Browse files Browse the repository at this point in the history
+ caught exception when there's insufficient data for resample
+ set default values of station_config when no config file is found
+ better adjustment of height in join_l3 when there's overlap between old and new data
  • Loading branch information
BaptisteVandecrux committed Aug 14, 2024
1 parent 8098629 commit bd1a8ce
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 42 deletions.
50 changes: 31 additions & 19 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ def toL3(L2, station_config={}, T_0=273.15):
ds[var.replace('gps_','')] = ('time', gps_coordinate_postprocessing(ds, var, station_config))

# processing continuous surface height, ice surface height, snow height
ds = process_surface_height(ds, station_config)
try:
ds = process_surface_height(ds, station_config)
except Exception as e:
logger.error("Error processing surface height at %s"%station_config['stid'])
logging.error(e, exc_info=True)

# making sure dataset has the attributes contained in the config files
ds.attrs['project'] = station_config['project']
Expand Down Expand Up @@ -168,28 +172,36 @@ def process_surface_height(ds, station_config={}):

# Convert to dataframe and combine surface height variables
df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe()

(ds['z_surf_combined'], ds['z_ice_surf'],
ds['z_surf_1_adj'], ds['z_surf_2_adj'])= combine_surface_height(df_in, ds.attrs['site_type'])
ds['z_surf_1_adj'], ds['z_surf_2_adj']) = combine_surface_height(df_in, ds.attrs['site_type'])


if ds.attrs['site_type'] == 'ablation':
# Calculate rolling minimum for ice surface height and snow height
ts_interpolated = np.minimum(xr.where(ds.z_ice_surf.notnull(),ds.z_ice_surf,ds.z_surf_combined),
ds.z_surf_combined).to_series().resample('H').interpolate(limit=72)

# Apply the rolling window with median calculation
z_ice_surf = (ts_interpolated
.rolling('14D', center=True, min_periods=1)
.median())

# Overprint the first and last 7 days with interpolated values
# because of edge effect of rolling windows
z_ice_surf.iloc[:24*7] = (ts_interpolated.iloc[:24*7]
.rolling('1D', center=True, min_periods=1)
.median().values)
z_ice_surf.iloc[-24*7:] = (ts_interpolated.iloc[-24*7:]
.rolling('1D', center=True, min_periods=1)
.median().values)

ts_interpolated = np.minimum(
xr.where(ds.z_ice_surf.notnull(),
ds.z_ice_surf,ds.z_surf_combined),
ds.z_surf_combined).to_series().resample('H').interpolate(limit=72)

if len(ts_interpolated)>24*7:
# Apply the rolling window with median calculation
z_ice_surf = (ts_interpolated
.rolling('14D', center=True, min_periods=1)
.median())
# Overprint the first and last 7 days with interpolated values
# because of edge effect of rolling windows
z_ice_surf.iloc[:24*7] = (ts_interpolated.iloc[:24*7]
.rolling('1D', center=True, min_periods=1)
.median().values)
z_ice_surf.iloc[-24*7:] = (ts_interpolated.iloc[-24*7:]
.rolling('1D', center=True, min_periods=1)
.median().values)
else:
z_ice_surf = (ts_interpolated
.rolling('1D', center=True, min_periods=1)
.median())

z_ice_surf = z_ice_surf.loc[ds.time]
# here we make sure that the periods where both z_stake and z_pt are
# missing are also missing in z_ice_surf
Expand Down
17 changes: 15 additions & 2 deletions src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,21 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata):

# importing station_config (dict) from config_folder (str path)
config_file = config_folder+l2.attrs['station_id']+'.toml'
with open(config_file) as fp:
station_config = toml.load(fp)

if os.path.exists(config_file):
# File exists, load the configuration
with open(config_file) as fp:
station_config = toml.load(fp)
else:
# File does not exist, initialize with standard info
# this was prefered by RSF over exiting with error
logger.error("\n***\nNo station_configuration file for %s.\nPlease create one on AWS-L0/metadata/station_configurations.\n***"%l2.attrs['station_id'])
station_config = {"stid":l2.attrs['station_id'],
"station_site":l2.attrs['station_id'],
"project": "PROMICE",
"location_type": "ice sheet",
}

# Perform Level 3 processing
l3 = toL3(l2, station_config)

Expand Down
46 changes: 37 additions & 9 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,15 @@ def loadArr(infile, isNead):

def align_surface_heights(data_series_new, data_series_old):
"""
Align two surface height time series based on the gap between their end and start.
Align two surface height time series based on the gap between their end and
start.
If the gap between the end of `data_series_old` and the start of `data_series_new` is less than a week,
the function aligns them based on the median value of the last week of `data_series_old` and the first
week of `data_series_new`. If the gap is larger than a week, it aligns them using a linear fit.
If there is overlap, the function uses the overlapping period to adjust the newer time series.
If the gap between the end of `data_series_old` and the start of `data_series_new`
is less than a week, the function aligns them based on the median value of
the last week of `data_series_old` and the first week of `data_series_new`.
If the gap is larger than a week, it aligns them using a linear fit. If
there is overlap, the function uses the overlapping period to adjust the
newer time series.
Parameters
----------
Expand All @@ -181,15 +184,22 @@ def align_surface_heights(data_series_new, data_series_old):
if first_new_idx <= last_old_idx:
# Find the overlapping period
overlap_start = first_new_idx
overlap_end = min(last_old_idx, data_series_new.last_valid_index())
overlap_end = min(last_old_idx, overlap_start+pd.to_timedelta('7D'))

# Compute the median values for the overlapping period
overlap_old = data_series_old[overlap_start:overlap_end].median()
overlap_new = data_series_new[overlap_start:overlap_end].median()

if np.isnan(overlap_old) or np.isnan(overlap_new):
overlap_end = min(last_old_idx, data_series_new.last_valid_index())

# Compute the median values for the overlapping period
overlap_old = data_series_old[overlap_start:overlap_end].median()
overlap_new = data_series_new[overlap_start:overlap_end].median()

# Align based on the overlapping median values
data_series_new = data_series_new - overlap_new + overlap_old

elif (first_new_idx - last_old_idx).days <= 7:
# Compute the median of the last week of data in the old series
last_week_old = data_series_old[last_old_idx - pd.Timedelta(weeks=1):last_old_idx].median()
Expand Down Expand Up @@ -232,6 +242,7 @@ def build_station_list(config_folder: str, target_station_site: str) -> list:
"""
station_info_list = [] # Initialize an empty list to store station information

found_as_station = False
for filename in os.listdir(config_folder):
if filename.endswith(".toml"):
file_path = os.path.join(config_folder, filename)
Expand All @@ -242,10 +253,25 @@ def build_station_list(config_folder: str, target_station_site: str) -> list:
stid = data.get("stid") # Get the station ID

# Check if the station site matches the target and stid is unique
if stid == target_station_site:
found_as_station = True
if station_site == target_station_site and stid:
station_info = data.copy() # Copy all attributes from the TOML file
station_info_list.append(station_info) # Add the station info to the list


if len(station_info_list)==0 and not found_as_station:
logger.error('\n***\nNo station_configuration file found for %s.\nProcessing it as a single-station PROMICE site.\n***'%target_station_site)
station_info = {
"stid": target_station_site,
"station_site": target_station_site,
"project": "PROMICE",
"location_type": "ice sheet",
}
station_info_list.append(station_info)
elif len(station_info_list)==0 :
logger.error('\n***\nThe name \"%s\" passed to join_l3 is a station name and not a site name (e.g. SCO_Lv3 instead of SCO_L). Please provide a site name that is named at least once in the "station_site" attribute of the station configuration files.\n***'%target_station_site)

return station_info_list

def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, metadata):
Expand All @@ -264,7 +290,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
filepath = os.path.join(folder_gcnet, stid+'.csv')
isNead = True
if not os.path.isfile(filepath):
logger.info(stid+' was listed as station but could not be found in '+folder_l3+' nor '+folder_gcnet)
logger.error('\n***\n'+stid+' was listed as station but could not be found in '+folder_l3+' nor '+folder_gcnet+'\n***')
continue

l3, _ = loadArr(filepath, isNead)
Expand All @@ -283,6 +309,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
logger.info('joining %s' % ' '.join(sorted_stids))

l3_merged = None

for l3, station_info in sorted_list_station_data:
stid = station_info["stid"]

Expand Down Expand Up @@ -346,7 +373,8 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me

# Assign site id
if not l3_merged:
logger.error('No level 2 data file found for '+site)
logger.error('No level 3 station data file found for '+site)
return None, sorted_list_station_data
l3_merged.attrs['site_id'] = site
l3_merged.attrs['stations'] = ' '.join(sorted_stids)
l3_merged.attrs['level'] = 'L3'
Expand Down
25 changes: 14 additions & 11 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
if resample:
d2 = resample_dataset(dataset, time)
logger.info('Resampling to '+str(time))
if len(d2.time) == 1:
logger.warning('Output of resample has length 1. Not enough data to calculate daily/monthly average.')
return None
else:
d2 = dataset.copy()

Expand Down Expand Up @@ -275,17 +278,17 @@ def addMeta(ds, meta):
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3

# Determine the temporal resolution
time_diff = pd.Timedelta((ds['time'][1] - ds['time'][0]).values)
if time_diff == pd.Timedelta('10min'):
sample_rate = "10min"
elif time_diff == pd.Timedelta('1H'):
sample_rate = "hourly"
elif time_diff == pd.Timedelta('1D'):
sample_rate = "daily"
elif 28 <= time_diff.days <= 31:
sample_rate = "monthly"
else:
sample_rate = "unknown_sample_rate"
sample_rate = "unknown_sample_rate"
if len(ds['time'])>1:
time_diff = pd.Timedelta((ds['time'][1] - ds['time'][0]).values)
if time_diff == pd.Timedelta('10min'):
sample_rate = "10min"
elif time_diff == pd.Timedelta('1H'):
sample_rate = "hourly"
elif time_diff == pd.Timedelta('1D'):
sample_rate = "daily"
elif 28 <= time_diff.days <= 31:
sample_rate = "monthly"

if 'station_id' in ds.attrs.keys():
ds.attrs['id'] = 'dk.geus.promice.station.' + ds.attrs['station_id']+'.'+sample_rate
Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/qc/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def persistence_qc(
if variable_thresholds is None:
variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS

logger.info(f"Running persistence_qc using {variable_thresholds}")
logger.debug(f"Running persistence_qc using {variable_thresholds}")

for k in variable_thresholds.keys():
if k in ["t", "p", "rh", "wspd", "wdir", "z_boom"]:
Expand Down

0 comments on commit bd1a8ce

Please sign in to comment.