From f4a358fb01a0dc648f02bc6577f137d0b8c6dadb Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Thu, 27 Jun 2024 11:20:48 +0200
Subject: [PATCH 1/5] adding surface height processing functions

---
 src/pypromice/process/L2toL3.py | 652 ++++++++++++++++++++++++++++++++
 1 file changed, 652 insertions(+)

diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py
index 0db1dffb..a1b91186 100755
--- a/src/pypromice/process/L2toL3.py
+++ b/src/pypromice/process/L2toL3.py
@@ -6,6 +6,9 @@
 import numpy as np
 import xarray as xr
 from statsmodels.nonparametric.smoothers_lowess import lowess
+from sklearn.linear_model import LinearRegression
+from pypromice.qc.github_data_issues import adjustData
+from scipy.interpolate import interp1d
 import logging
 logger = logging.getLogger(__name__)
 
@@ -14,6 +17,8 @@ def toL3(L2, T_0=273.15):
     derived variables:
         - Turbulent fluxes
         - smoothed and inter/extrapolated GPS coordinates
+        - continuous surface height, ice surface height, snow height
+        - thermistor depths
     
     
     Parameters
@@ -78,8 +83,655 @@ def toL3(L2, T_0=273.15):
     # adding average coordinate as attribute        
     for v in ['lat','lon','alt']:
         ds.attrs[v+'_avg'] = ds[v].mean(dim='time').item()
+    
+    # processing continuous surface height, ice surface height, snow height
+    ds = surfaceHeightProcessing(ds)
+
+    return ds
+
+def surfaceHeightProcessing(ds):
+    # creating surface height variables
+    ds['z_surf_1'] = ('time', ds['z_boom_u'].data*np.nan)
+    ds['z_surf_2'] = ('time', ds['z_boom_u'].data*np.nan)
+
+    if ds.attrs['site_type']  == 'ablation':
+        ds['z_surf_1'] = 2.6 - ds['z_boom_u']
+        first_valid_index = ds.time.where(ds.z_stake.notnull(),drop=True).data[0]
+        if first_valid_index:
+            ds['z_surf_2'] =  ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+
+        if 'z_pt_cor' in ds.data_vars:
+            ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data)
+
+    else:
+        first_valid_index = ds.time.where(ds.z_boom_u.notnull(),drop=True).data[0]
+        ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
+        if 'z_boom_l' in ds.data_vars:
+            first_valid_index = ds.time.where(ds.z_boom_l.notnull(),drop=True).data[0]
+            ds['z_surf_2'] = ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l']
+        if 'z_stake' in ds.data_vars:
+            if ds.z_stake.notnull().any():
+                first_valid_index = ds.time.where(ds.z_stake.notnull(),drop=True).data[0]
+                ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+
+
+    ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2','z_ice_surf'])
+
+    df_in = ds[[v for v in ['z_surf_1', 'z_surf_2','z_ice_surf'] if v in ds.data_vars]].to_dataframe()
+
+    ds['z_surf_combined'],  ds['z_ice_surf']  = combineSurfaceHeight(df_in, ds.attrs['site_type'])
+
+    if ds.attrs['site_type']  == 'ablation':
+        ds['z_ice_surf'] = ds.z_surf_combined.to_series().rolling('365D').min() 
+        ds['snow_height'] = ds['z_surf_combined'] -ds['z_ice_surf']
+    elif ds.attrs['site_type']  == 'accumulation':
+        ds['z_ice_surf'] = ('time', ds['z_surf_1'].data*np.nan)
+        ds['snow_height'] = ds['z_surf_combined']
+    elif ds.attrs['site_type']  == 'bedrock':
+        ds['z_ice_surf'] = ('time', ds['z_surf_1'].data*np.nan)
+        ds['snow_height'] = ds['z_surf_combined']
+    else:
+        logger.info('other site type')
+
+    if ds.attrs['site_type'] != 'bedrock':
+        ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
+        vars_out = [v.replace('t','d_t') for v in ice_temp_vars]
+        vars_out.append('t_i_10m')
+        df_out = thermistorDepth(
+            ds[ice_temp_vars+['z_surf_combined']].to_dataframe(),
+                                        ds.attrs['station_id'])
+        for var in df_out.columns:
+            ds[var] = ('time', df_out[var].values)
     return ds
 
+def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
+    '''Combines the data from three sensor: the two sonic rangers and the
+    pressure transducer, to recreate the surface height, the ice surface height
+    and the snow depth through the years. For the accumulation sites, it is
+    only the average of the two sonic rangers (after manual adjustments to 
+    correct maintenance shifts). For the ablation sites, first an ablation 
+    period is estimated each year (either the period when z_pt_cor decreases
+    or JJA if no better estimate) then different adjustmnents are conducted 
+    to stitch the three time series together: z_ice_surface (adjusted from
+    z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake)
+    are used in the ablation period while an average of z_surf_1 and z_surf_2
+    are used otherwise, after they are being adjusted to z_ice_surf at the end 
+    of the ablation season.
+    
+    Parameters
+    ----------
+    df : pandas.dataframe
+        Dataframe with datetime index and variables z_surf_1, z_surf_2 and z_ice_surf
+    site_type : str 
+        Either 'accumulation' or 'ablation'
+    threshold_ablation : float
+        Threshold to which a z_pt_cor hourly decrease is compared. If the decrease
+        is higher, then there is ablation.
+    '''
+    logger.info('Combining surface height')
+    if 'z_surf_2' not in df.columns:
+        logger.info('-> did not find z_surf_2')
+        df["z_surf_combined"] = hampel(df["z_surf_1"].interpolate(limit=72)).values
+        return df["z_surf_combined"], df["z_surf_combined"]*np.nan
+
+    elif site_type == 'accumulation':
+        logger.info('-> no z_pt or accumulation site: averaging z_surf_1 and z_surf_2')
+        df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values
+        df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values
+        # adjusting z_surf_2 to z_surf_1
+        df["z_surf_2_adj"]  = df["z_surf_2_adj"]  + (df["z_surf_1_adj"]- df["z_surf_2_adj"]).mean()
+        # z_surf_combined is the average of the two z_surf
+        if df.z_surf_1_adj.notnull().any() & df.z_surf_2_adj.notnull().any():
+            df['z_surf_combined'] = np.nanmean(
+                df[['z_surf_1_adj', 'z_surf_2_adj']].values, axis = 1)
+        elif df.z_surf_1_adj.notnull().any():
+            df['z_surf_combined'] = df.z_surf_1_adj.values
+        elif df.z_surf_2_adj.notnull().any():
+            df['z_surf_combined'] = df.z_surf_2_adj.values
+
+        df["z_surf_combined"] = hampel(df["z_surf_combined"].interpolate(limit=72)).values
+        return df["z_surf_combined"], df["z_surf_combined"]*np.nan
+
+    else:
+        logger.info('-> ablation site')
+        # smoothing and filtering pressure transducer data
+        df["z_ice_surf_adj"] = hampel(df["z_ice_surf"].interpolate(limit=72)).values
+        df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values
+        df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values
+
+        # defining ice ablation period from the decrease of a smoothed version of z_pt
+        # meaning when smoothed_z_pt.diff() < threshold_ablation 
+        # first smoothing
+        smoothed_PT =  (df['z_ice_surf']
+                        .resample('H')
+                        .interpolate(limit=72)
+                        .rolling('14D',center=True, min_periods=1)
+                        .mean())
+        # second smoothing
+        smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean()
+
+        smoothed_PT = smoothed_PT.reindex(df.index,method='ffill')
+        smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan
+
+        # logical index where ablation is detected
+        ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, 
+                                      np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9]))
+
+        # finding the beginning and end of each period with True
+        idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2)
+        idx[:, 1] -= 1
+        # fill small gaps in the ice ablation periods.
+        for i in range(len(idx)-1):
+            ind = idx[i]
+            ind_next = idx[i+1]
+            # if the end of an ablation period is less than 15 days away from
+            # the next ablation, then it is still considered like the same ablation
+            # season
+            if df.index[ind_next[0]]-df.index[ind[1]]<pd.to_timedelta('15 days'):
+                ind_ablation[ind[1]:ind_next[0]]=True
+
+        hs1=df["z_surf_1_adj"].interpolate(limit=24*2).copy()
+        hs2=df["z_surf_2_adj"].interpolate(limit=24*2).copy()
+        z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy()
+
+        # the surface heights are adjusted so that they start at 0
+        if any(~np.isnan(hs1.iloc[:24*7])):
+            hs1 = hs1 - hs1.iloc[:24*7].mean()
+        if any(~np.isnan(hs2.iloc[:24*7])):
+            hs2 = hs2 - hs2.iloc[:24*7].mean()
+        if any(~np.isnan(z.iloc[:24*7])):
+            # expressing ice surface height relative to its mean value in the 
+            # first week of the record
+            z = z - z.iloc[:24*7].mean()
+        elif z.notnull().any():
+            # if there is no data in the first week but that there are some
+            # PT data afterwards
+            if ((z.first_valid_index() - hs1.first_valid_index()) < pd.to_timedelta('251D')) &\
+              ((z.first_valid_index() - hs1.first_valid_index()) > pd.to_timedelta('0H')):
+                # if the pressure transducer is installed the year after then
+                # we use the mean surface height 1 on its first week as a 0
+                # for the ice height
+                z = z - z.loc[
+                    z.first_valid_index():(z.first_valid_index()+pd.to_timedelta('14D'))
+                    ].mean() + hs1.iloc[:24*7].mean()
+            else:
+                # if there is more than a year (actually 251 days) between the 
+                # initiation of the AWS and the installation of the pressure transducer
+                # we remove the intercept in the pressure transducer data.
+                # Removing the intercept 
+                # means that we consider the ice surface height at 0 when the AWS
+                # is installed, and not when the pressure transducer is installed.
+                Y = z.iloc[:].values.reshape(-1, 1)
+                X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) 
+                Y = Y[~np.isnan(Y)] 
+                linear_regressor = LinearRegression()  
+                linear_regressor.fit(X, Y)
+                Y_pred = linear_regressor.predict(z.index.astype(np.int64).values.reshape(-1, 1) )
+                z = z-Y_pred[0]
+
+        years = df.index.year.unique().values
+        ind_start = years.copy()
+        ind_end =  years.copy()
+        logger.info('-> estimating ablation period for each year')
+        for i, y in enumerate(years):
+            # for each year
+            ind_yr = df.index.year.values==y
+            ind_abl_yr = np.logical_and(ind_yr, ind_ablation)
+
+            if df.loc[
+                    np.logical_and(ind_yr, df.index.month.isin([6,7,8])),
+                    "z_ice_surf_adj"].isnull().all():
+
+                ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8]))
+                ind_ablation[ind_yr] = ind_abl_yr[ind_yr]
+                logger.info(y, 'no z_ice_surf, just using JJA')
+
+            if np.any(ind_abl_yr):
+                logger.info(y, 'derived from z_ice_surf')
+                # if there are some ablation flagged for that year
+                # then find begining and end
+                ind_start[i] = np.argwhere(ind_abl_yr)[0][0]
+                ind_end[i] = np.argwhere(ind_abl_yr)[-1][0]
+
+            else:
+                logger.info(y, 'could not estimate ablation season')
+                # otherwise left as nan
+                ind_start[i] = -999
+                ind_end[i] = -999
+
+        # adjustement loop
+        missing_hs2 = 0 # if hs2 is missing then when it comes back it is adjusted to hs1
+        hs2_ref = 0 # by default, the PT is the reference: hs1 and 2 will be adjusted to PT
+        # but if it is missing one year or one winter, then it needs to be rajusted
+        # to hs1 and hs2 the year after.
+
+        for i, y in enumerate(years):
+            logger.info(y)
+            # defining subsets of hs1, hs2, z
+            hs1_jja =  hs1[str(y)+'-06-01':str(y)+'-09-01']
+            hs2_jja =  hs2[str(y)+'-06-01':str(y)+'-09-01']
+            z_jja =  z[str(y)+'-06-01':str(y)+'-09-01']
+
+            z_ablation = z.iloc[ind_start[i]:ind_end[i]]
+            hs2_ablation = hs2.iloc[ind_start[i]:ind_end[i]]
+
+            hs1_year = hs1[str(y)]
+            hs2_year = hs2[str(y)]
+
+            hs2_winter = hs2[str(y)+'-01-01':str(y)+'-03-01'].copy()
+            z_winter = z[str(y)+'-01-01':str(y)+'-03-01'].copy()
+
+            hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
+            hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy()
+
+            z_year = z[str(y)]
+            if hs1_jja.isnull().all() and hs2_jja.isnull().all() and z_jja.isnull().all():
+                    # if there is no height for a year between June and September
+                    # then the adjustment cannot be made automatically
+                    # it needs to be specified manually on the adjustment files
+                    # on https://github.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues
+                    continue
+
+            if all(np.isnan(z_jja)) and any(~np.isnan(hs2_jja)):
+                # if there is no PT for a given year, but there is some hs2
+                # then z will be adjusted to hs2 next time it is available       
+                hs2_ref = 1
+
+            if all(np.isnan(z_winter)) and all(np.isnan(hs2_winter)):
+                # if there is no PT nor hs2 during the winter, then again
+                # we need to adjust z to match hs2 when ablation starts
+                hs2_ref = 1
+
+
+            if hs2_ref:
+                if ind_start[i] != -999:
+                    # the first year there is both ablation and PT data available
+                    # then PT is adjusted to hs2
+                    if any(~np.isnan(z_ablation)) and any(~np.isnan(hs2_ablation)):
+                        logger.info('adjusting z to hs2')
+                        tmp1 = z_ablation.copy()
+                        tmp2 = hs2_ablation.copy()
+                        tmp1[np.isnan(tmp2)] = np.nan
+                        tmp2[np.isnan(tmp1)] = np.nan
+
+                        # in some instances, the PT data is available but no ablation
+                        # is recorded, then hs2 remains the reference during that time.
+                        # When eventually there is ablation, then we need to find the 
+                        # first index in these preceding ablation-free years
+                        # the shift will be applied back from this point
+                        first_index = z[:z[str(y)].first_valid_index()].isnull().iloc[::-1].idxmax()
+                        z[first_index:] = z[first_index:] -  np.nanmean(tmp1)  +  np.nanmean(tmp2)
+                        hs2_ref = 0 # from now on PT is the reference
+            else:         
+                # if there is some ablation
+                if (ind_start[i] != -999) & z_year.notnull().any():
+                    # calculating first index with PT, hs1 and hs2
+                    first_index = z_year.first_valid_index()
+                    if hs1_year.notnull().any():
+                        first_index = np.max(np.array(
+                            [first_index,
+                             hs1_year.first_valid_index()]))
+                    if hs2_year.notnull().any():
+                        first_index = np.max(np.array(
+                            [first_index,
+                             hs2_year.first_valid_index()]))
+
+                    # if PT, hs1 and hs2 are all nan until station is reactivated, then 
+                    first_day_of_year = pd.to_datetime(str(y)+'-01-01')
+
+                    if len(z[first_day_of_year:first_index-pd.to_timedelta('1D')])>0:
+                        if z[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \
+                            hs1[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \
+                                hs2[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all():
+                                if (~np.isnan(np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])) \
+                                    and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))):    
+                                    logger.info(' ======= adjusting hs1 and hs2 to z_pt')      
+                                    if ~np.isnan(np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) ):
+                                        hs1[first_index:] = hs1[first_index:] \
+                                            -  np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')])  \
+                                                +  np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])
+                                    if ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]) ):
+                                        hs2[first_index:] = hs2[first_index:] \
+                                            -  np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')])  \
+                                                +  np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])
+
+            # adapting hs2
+            if (ind_end[i] != -999): 
+                # and if there are PT data available at the end of the melt season
+                if np.any(~np.isnan(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)])):
+                    logger.info('adjusting hs2 to z')
+                    # then we adjust hs2 to the end-of-ablation z    
+                    # first trying at the end of melt season
+                    if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): 
+                        logger.info('using end of melt season')
+                        hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \
+                            np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])  + \
+                                np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])
+                    # if not possible, then trying the end of the following accumulation season
+                    elif (i+1 < len(ind_start)):
+                        if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): 
+                            logger.info('using end of accumulation season')
+                            hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \
+                                np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])  + \
+                                    np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
+            else:
+                logger.info('no ablation')
+
+                if all(np.isnan(hs2_following_winter)):
+                    logger.info('no hs2')
+                    missing_hs2 = 1
+                elif missing_hs2 == 1:
+                    logger.info('adjusting hs2')
+                    # and if there are some hs2 during the accumulation period
+                    if any(~np.isnan(hs1_following_winter)):
+                        logger.info('to hs1')
+                        # then we adjust hs1 to hs2 during the accumulation area
+                        # adjustment is done so that the mean hs1 and mean hs2 match 
+                        # for the period when both are available
+                        hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan
+                        hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan
+
+                        hs2[str(y)+'-01-01':] = hs2[str(y)+'-01-01':] \
+                            -  np.nanmean(hs2_following_winter)  +  np.nanmean(hs1_following_winter)
+                        missing_hs2 = 0
+
+                # adjusting hs1 to hs2 (no ablation case)
+                if any(~np.isnan(hs1_following_winter)):
+                    logger.info('adjusting hs1')
+                    # and if there are some hs2 during the accumulation period
+                    if any(~np.isnan(hs2_following_winter)):
+                        logger.info('to hs2')
+                        # then we adjust hs1 to hs2 during the accumulation area
+                        # adjustment is done so that the mean hs1 and mean hs2 match 
+                        # for the period when both are available
+                        hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan
+                        hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan
+
+                        hs1[str(y)+'-09-01':] = hs1[str(y)+'-09-01':] \
+                            -  np.nanmean(hs1_following_winter)  +  np.nanmean(hs2_following_winter)
+
+            if ind_end[i] != -999:
+                # if there is some hs1
+
+                if any(~np.isnan(hs1_following_winter)):
+                    logger.info('adjusting hs1')
+                    # and if there are some hs2 during the accumulation period
+                    if any(~np.isnan(hs2_following_winter)):
+                        logger.info('to hs2')
+                        # then we adjust hs1 to hs2 during the accumulation area
+                        # adjustment is done so that the mean hs1 and mean hs2 match 
+                        # for the period when both are available
+                        tmp1 = hs1.iloc[ind_end[i]:min(len(hs1),ind_end[i]+24*30*9)].copy()
+                        tmp2 = hs2.iloc[ind_end[i]:min(len(hs2),ind_end[i]+24*30*9)].copy()
+
+                        tmp1[np.isnan(tmp2)] = np.nan
+                        tmp2[np.isnan(tmp1)] = np.nan
+
+                        hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] -  np.nanmean(tmp1)  +  np.nanmean(tmp2)
+
+                    # if no hs2, then use PT data available at the end of the melt season
+                    elif np.any(~np.isnan(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)])):
+                        logger.info('to z')
+                        # then we adjust hs2 to the end-of-ablation z    
+                        # first trying at the end of melt season
+                        if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): 
+                            logger.info('using end of melt season')
+                            hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \
+                                np.nanmean(hs1.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])  + \
+                                    np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])
+                        # if not possible, then trying the end of the following accumulation season
+                        elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): 
+                            logger.info('using end of accumulation season')
+                            hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \
+                                np.nanmean(hs1.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])  + \
+                                    np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])
+
+        df["z_surf_1_adj"] = hs1.interpolate(limit=2*24).values
+        df["z_surf_2_adj"] = hs2.interpolate(limit=2*24).values
+        df["z_ice_surf_adj"] = z.interpolate(limit=2*24).values
+
+        # making a summary of the surface height
+        df["z_surf_combined"] = np.nan
+
+        # in winter, both SR1 and SR2 are used
+        df["z_surf_combined"] = np.nanmean( df[["z_surf_1_adj",
+                                                         "z_surf_2_adj"]].values, 
+                                                    axis = 1)
+
+        # in ablation season we use SR2 instead of the SR1&2 average
+        # here two options:
+        # 1) we ignore the SR1 and only use SR2
+        # 2) we use SR1 when SR2 is not available (commented) 
+        # the later one can cause jumps when SR2 starts to be available few days after SR1
+        data_update = df["z_surf_2_adj"].interpolate(limit=72).values 
+        ind_update = ind_ablation 
+        #ind_update = np.logical_and(ind_ablation,  ~np.isnan(data_update))
+        df.loc[ind_update,"z_surf_combined"] = data_update[ind_update]  
+
+        # in ablation season we use pressure transducer over all other options
+        data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values 
+        ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update))
+        df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] 
+    logger.info('surface height combination finished')
+    return df['z_surf_combined'], df["z_ice_surf_adj"] 
+
+def hampel(vals_orig, k=7*24, t0=3):
+    '''
+    vals: pandas series of values from which to remove outliers
+    k: size of window (including the sample; 7 is equal to 3 on either side of value)
+    '''
+    #Make copy so original not edited
+    vals=vals_orig.copy()    
+    #Hampel Filter
+    L= 1.4826
+    rolling_median=vals.rolling(k).median()
+    difference=np.abs(rolling_median-vals)
+    median_abs_deviation=difference.rolling(k).median()
+    threshold= t0 *L * median_abs_deviation
+    outlier_idx=difference>threshold
+    outlier_idx[0:round(k/2)]=False
+    vals.loc[outlier_idx]=np.nan
+    return(vals)
+
+def thermistorDepth(df_in, site,
+    installation_info_file = '../PROMICE-AWS-data-issues/fieldwork_summary_PROMICE_GC-Net.csv'):
+    '''Calculates the depth of the thermistors through time based on their 
+    installation depth (collected in a google sheet) and on the change of surface
+    height: instruments getting buried under new snow or surfacing due to ablation.
+    There is a potential for additional filtering of thermistor data for surfaced
+    (or just noisy) thermistors, but that is currently deactivated because slow.
+    
+    Parameters
+    ----------
+    df_in : pandas:dataframe
+        dataframe containing the ice/firn temperature t_i_* as well as the 
+        combined surface height z_surf_combined
+    site : str
+        stid, so that maintenance date and sensor installation depths can be found 
+        in database
+    installation_info_file : str
+        path to file  with thermistor installation dates and depths
+    '''
+
+    logger.info('Calculating thermistor depth')
+
+    maintenance_string = pd.read_csv(installation_info_file)
+
+    maintenance_string = maintenance_string.replace("OUT", np.nan)
+    maintenance_string[
+        "length_of_thermistor_string_on_surface_from_surface_marking"
+    ] = maintenance_string[
+        "length_of_thermistor_string_on_surface_from_surface_marking"
+    ].astype(
+        float
+    )
+    maintenance_string["date"] = pd.to_datetime(maintenance_string["date_visit"])
+    maintenance = maintenance_string.loc[maintenance_string.station == site]
+
+    temp_cols_name = ['t_i_'+str(i) for i in range(12) if 't_i_'+str(i) in df_in.columns]
+    num_therm = len(temp_cols_name)
+    depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)]
+    if num_therm == 8:
+        ini_depth = [1, 2, 3, 4, 5, 6, 7, 10]
+    else:
+        ini_depth = [0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]
+    df_in[depth_cols_name] = np.nan
+
+    # filtering the surface height
+    surface_height = df_in["z_surf_combined"].copy()
+    ind_filter = surface_height.rolling(window=14, center=True).var() > 0.1
+    if any(ind_filter):
+        surface_height[ind_filter] = np.nan
+    df_in["z_surf_combined"] = surface_height.values
+    z_surf_interp = df_in["z_surf_combined"].interpolate()
+
+    # first initialization of the depths
+    for i, col in enumerate(depth_cols_name):
+        df_in[col] = (
+            ini_depth[i]
+            + z_surf_interp.values
+            - z_surf_interp[z_surf_interp.first_valid_index()]
+        )
+
+    # reseting depth at maintenance
+    if len(maintenance.date) == 0:
+        logger.info("No maintenance at "+site)
+
+    for date in maintenance.date:
+        if date > z_surf_interp.last_valid_index():
+            continue
+        new_depth = maintenance.loc[
+                                        maintenance.date == date
+                                    ].depth_new_thermistor_m.values[0]
+        if isinstance(new_depth, str):
+            logger.info('thermistor reinstalled on '+date.strftime('%Y-%m-%d %X'))
+            new_depth = [float(x) for x in new_depth.split(",")]
+            for i, col in enumerate(depth_cols_name):
+                tmp = df_in[col].copy()
+                tmp.loc[date:] = (
+                    new_depth[i]
+                    + z_surf_interp[date:].values
+                    - z_surf_interp[date:][
+                        z_surf_interp[date:].first_valid_index()
+                    ]
+                )
+                df_in[col] = tmp.values
+
+    # % Filtering thermistor data
+    # for i in range(len(temp_cols_name)):
+    #     tmp = df_in[temp_cols_name[i]].copy()
+
+    #     # variance filter
+    #     ind_filter = (
+    #         df_in[temp_cols_name[i]]
+    #         .interpolate(limit=14)
+    #         .rolling(window=7)
+    #         .var()
+    #         > 0.5
+    #     )
+    #     month = (
+    #         df_in[temp_cols_name[i]].interpolate(limit=14).index.month.values
+    #     )
+    #     ind_filter.loc[np.isin(month, [5, 6, 7])] = False
+    #     if any(ind_filter):
+    #         tmp.loc[ind_filter] = np.nan
+
+    #     # before and after maintenance adaptation filter
+    #     if len(maintenance.date) > 0:
+    #         for date in maintenance.date:
+    #             if isinstance(
+    #                 maintenance.loc[
+    #                     maintenance.date == date
+    #                 ].depth_new_thermistor_m.values[0],
+    #                 str,
+    #             ):
+    #                 ind_adapt = np.abs(
+    #                     tmp.interpolate(limit=14).index.values
+    #                     - pd.to_datetime(date).to_datetime64()
+    #                 ) < np.timedelta64(7, "D")
+    #                 if any(ind_adapt):
+    #                     tmp.loc[ind_adapt] = np.nan
+
+    #     # surfaced thermistor
+    #     ind_pos = df_in[depth_cols_name[i]] < 0.1
+    #     if any(ind_pos):
+    #         tmp.loc[ind_pos] = np.nan
+    #     # copying the filtered values to the original table
+    #     df_in[temp_cols_name[i]] = tmp.values
+
+    # interpolating 10 m firn/ice temp
+    df_in['t_i_10m'] = interpolate_temperature(
+        df_in.index.values,
+        df_in[depth_cols_name].values.astype(float),
+        df_in[temp_cols_name].values.astype(float),
+        kind="linear",
+        min_diff_to_depth=1.5,
+    ).set_index('date').values
+
+    # filtering
+    ind_pos = df_in["t_i_10m"] > 0.1
+    ind_low = df_in["t_i_10m"] < -70
+    df_in.loc[ind_pos, "t_i_10m"] = np.nan
+    df_in.loc[ind_low, "t_i_10m"] = np.nan
+    depth_cols_name.append('t_i_10m')
+    return df_in[depth_cols_name]
+
+
+def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=2,
+    kind="quadratic"):
+    '''Calculates the depth of the thermistors through time based on their 
+    installation depth (collected in a google sheet) and on the change of surface
+    height: instruments getting buried under new snow or surfacing due to ablation.
+    There is a potential for additional filtering of thermistor data for surfaced
+    (or just noisy) thermistors, but that is currently deactivated because slow.
+    
+    Parameters
+    ----------
+    dates : numpy.array
+        array of datetime64
+    depth_cor : numpy.ndarray
+        matrix of depths
+    temp : numpy.ndarray
+        matrix of temperatures
+    depth : float
+        constant depth at which (depth_cor, temp) should be interpolated.
+    min_diff_to_depth: float
+        maximum difference allowed between the available depht and the target depth
+        for the interpolation to be done.
+    kind : str
+        type of interpolation from scipy.interpolate.interp1d
+    '''
+
+    depth_cor = depth_cor.astype(float)
+    df_interp = pd.DataFrame()
+    df_interp["date"] = dates
+    df_interp["temperatureObserved"] = np.nan
+
+    # preprocessing temperatures for small gaps
+    tmp = pd.DataFrame(temp)
+    tmp["time"] = dates
+    tmp = tmp.set_index("time")
+    tmp = tmp.resample("H").mean()
+    # tmp = tmp.interpolate(limit=24*7)
+    temp = tmp.loc[dates].values
+    for i in (range(len(dates))):
+        x = depth_cor[i, :].astype(float)
+        y = temp[i, :].astype(float)
+        ind_no_nan = ~np.isnan(x + y)
+        x = x[ind_no_nan]
+        y = y[ind_no_nan]
+        x, indices = np.unique(x, return_index=True)
+        y = y[indices]
+        if len(x) < 2 or np.min(np.abs(x - depth)) > min_diff_to_depth:
+            continue
+        f = interp1d(x, y, kind, fill_value="extrapolate")
+        df_interp.iloc[i, 1] = np.min(f(depth), 0)
+
+    if df_interp.iloc[:5, 1].std() > 0.1:
+        df_interp.iloc[:5, 1] = np.nan
+
+    return df_interp
+
 def gpsCoordinatePostprocessing(ds, var):
         # saving the static value of 'lat','lon' or 'alt' stored in attribute
         # as it might be the only coordinate available for certain stations (e.g. bedrock)

From 9685a1ed14edc6c72f948c2a83137eddf7f3e73c Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Thu, 27 Jun 2024 11:22:53 +0200
Subject: [PATCH 2/5] better doc for surfaceHeightProcessing

---
 src/pypromice/process/L2toL3.py | 91 +++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 34 deletions(-)

diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py
index a1b91186..66515fd1 100755
--- a/src/pypromice/process/L2toL3.py
+++ b/src/pypromice/process/L2toL3.py
@@ -89,61 +89,84 @@ def toL3(L2, T_0=273.15):
 
     return ds
 
+
 def surfaceHeightProcessing(ds):
-    # creating surface height variables
-    ds['z_surf_1'] = ('time', ds['z_boom_u'].data*np.nan)
-    ds['z_surf_2'] = ('time', ds['z_boom_u'].data*np.nan)
+    """
+    Process surface height data for different site types and create 
+    surface height variables.
 
-    if ds.attrs['site_type']  == 'ablation':
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        The dataset containing various measurements and attributes including
+        'site_type' which determines the type of site (e.g., 'ablation', 
+        'accumulation', 'bedrock') and other relevant data variables such as 
+        'z_boom_u', 'z_stake', 'z_pt_cor', etc.
+
+    Returns
+    -------
+    xarray.Dataset
+        The dataset with additional processed surface height variables:
+        'z_surf_1', 'z_surf_2', 'z_ice_surf', 'z_surf_combined', 'snow_height',
+        and possibly depth variables derived from temperature measurements.
+    """
+    # Initialize surface height variables with NaNs
+    ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan)
+    ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan)
+
+    if ds.attrs['site_type'] == 'ablation':
+        # Calculate surface heights for ablation sites
         ds['z_surf_1'] = 2.6 - ds['z_boom_u']
-        first_valid_index = ds.time.where(ds.z_stake.notnull(),drop=True).data[0]
+        first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
         if first_valid_index:
-            ds['z_surf_2'] =  ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
-
+            ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+        
+        # Use corrected point data if available
         if 'z_pt_cor' in ds.data_vars:
             ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data)
-
+    
     else:
-        first_valid_index = ds.time.where(ds.z_boom_u.notnull(),drop=True).data[0]
+        # Calculate surface heights for other site types
+        first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0]
         ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u']
         if 'z_boom_l' in ds.data_vars:
-            first_valid_index = ds.time.where(ds.z_boom_l.notnull(),drop=True).data[0]
+            first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0]
             ds['z_surf_2'] = ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l']
-        if 'z_stake' in ds.data_vars:
-            if ds.z_stake.notnull().any():
-                first_valid_index = ds.time.where(ds.z_stake.notnull(),drop=True).data[0]
-                ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
-
-
-    ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2','z_ice_surf'])
-
-    df_in = ds[[v for v in ['z_surf_1', 'z_surf_2','z_ice_surf'] if v in ds.data_vars]].to_dataframe()
-
-    ds['z_surf_combined'],  ds['z_ice_surf']  = combineSurfaceHeight(df_in, ds.attrs['site_type'])
-
-    if ds.attrs['site_type']  == 'ablation':
-        ds['z_ice_surf'] = ds.z_surf_combined.to_series().rolling('365D').min() 
-        ds['snow_height'] = ds['z_surf_combined'] -ds['z_ice_surf']
-    elif ds.attrs['site_type']  == 'accumulation':
-        ds['z_ice_surf'] = ('time', ds['z_surf_1'].data*np.nan)
-        ds['snow_height'] = ds['z_surf_combined']
-    elif ds.attrs['site_type']  == 'bedrock':
-        ds['z_ice_surf'] = ('time', ds['z_surf_1'].data*np.nan)
+        if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any():
+            first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0]
+            ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake']
+
+    # Adjust data for the created surface height variables
+    ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
+
+    # Convert to dataframe and combine surface height variables
+    df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe()
+    ds['z_surf_combined'], ds['z_ice_surf'] = combineSurfaceHeight(df_in, ds.attrs['site_type'])
+
+    if ds.attrs['site_type'] == 'ablation':
+        # Calculate rolling minimum for ice surface height and snow height
+        ds['z_ice_surf'] = ds.z_surf_combined.to_series().rolling('365D').min()
+        ds['snow_height'] = ds['z_surf_combined'] - ds['z_ice_surf']
+    elif ds.attrs['site_type'] in ['accumulation', 'bedrock']:
+        # Handle accumulation and bedrock site types
+        ds['z_ice_surf'] = ('time', ds['z_surf_1'].data * np.nan)
         ds['snow_height'] = ds['z_surf_combined']
     else:
+        # Log info for other site types
         logger.info('other site type')
 
     if ds.attrs['site_type'] != 'bedrock':
+        # Process ice temperature data and create depth variables
         ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v]
-        vars_out = [v.replace('t','d_t') for v in ice_temp_vars]
+        vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars]
         vars_out.append('t_i_10m')
-        df_out = thermistorDepth(
-            ds[ice_temp_vars+['z_surf_combined']].to_dataframe(),
-                                        ds.attrs['station_id'])
+        df_out = thermistorDepth(ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(), ds.attrs['station_id'])
         for var in df_out.columns:
             ds[var] = ('time', df_out[var].values)
+    
     return ds
 
+
 def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
     '''Combines the data from three sensor: the two sonic rangers and the
     pressure transducer, to recreate the surface height, the ice surface height

From 057124873b62ef8527ef298fa1a69efc214baa7f Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Thu, 27 Jun 2024 11:43:21 +0200
Subject: [PATCH 3/5] added new variables to variable.csv

---
 src/pypromice/resources/variables.csv | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv
index 77eaa7b8..5979d63d 100644
--- a/src/pypromice/resources/variables.csv
+++ b/src/pypromice/resources/variables.csv
@@ -88,4 +88,21 @@ rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corre
 wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4
 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4
 wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4
-wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4
\ No newline at end of file
+wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4
+z_surf_1,height_of_surface_1,"Surface height 1, relative to surface height at installation and calculated from boom (upper one if two)",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4
+z_surf_2,height_of_surface_2,"Surface height 2, relative to surface height at installation and calculated from stake booms and pt",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4
+z_surf_combined,height_of_surface_combined,"Surface height combined from multiple sensors, relative to ice surface height at installation",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4
+z_ice_surf,height_of_ice_surface,"Ice surface height, relative to ice surface height at installation and calculated from pt_cor and z_stake",m,modelResult,time,FALSE,L3,,,,one-boom,0,0,1,4
+snow_height,height_of_snow,"Snow surface height, relative to ice surface",m,modelResult,time,FALSE,L3,0,,,one-boom,0,0,1,4
+d_t_i_1,depth_of_thermistor_1,Depth of thermistor 1,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_2,depth_of_thermistor_2,Depth of thermistor 2,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_3,depth_of_thermistor_3,Depth of thermistor 3,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_4,depth_of_thermistor_4,Depth of thermistor 4,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_5,depth_of_thermistor_5,Depth of thermistor 5,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_6,depth_of_thermistor_6,Depth of thermistor 6,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_7,depth_of_thermistor_7,Depth of thermistor 7,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_8,depth_of_thermistor_8,Depth of thermistor 8,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4
+d_t_i_9,depth_of_thermistor_9,Depth of thermistor 9,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4
+d_t_i_10,depth_of_thermistor_10,Depth of thermistor 10,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4
+d_t_i_11,depth_of_thermistor_11,Depth of thermistor 11,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4
+t_i_10m,10m_subsurface_temperature,10 m subsurface temperature,degrees_C,modelResult,time,FALSE,L3,-70,0,,all,0,0,1,4

From c9f4ceb2f2af9571a7c2c2c16e756705b40b0f43 Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Thu, 27 Jun 2024 11:57:25 +0200
Subject: [PATCH 4/5] fixing tests

added site-type to dummy test dataset
---
 src/pypromice/process/L2toL3.py      | 33 +++++++++++++++++-----------
 src/pypromice/process/test.py        |  1 +
 src/pypromice/test/test_config1.toml |  1 +
 src/pypromice/test/test_config2.toml |  1 +
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py
index 66515fd1..39ae0793 100755
--- a/src/pypromice/process/L2toL3.py
+++ b/src/pypromice/process/L2toL3.py
@@ -9,7 +9,7 @@
 from sklearn.linear_model import LinearRegression
 from pypromice.qc.github_data_issues import adjustData
 from scipy.interpolate import interp1d
-import logging
+import logging, os
 logger = logging.getLogger(__name__)
 
 def toL3(L2, T_0=273.15):
@@ -205,8 +205,7 @@ def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
         df["z_surf_2_adj"]  = df["z_surf_2_adj"]  + (df["z_surf_1_adj"]- df["z_surf_2_adj"]).mean()
         # z_surf_combined is the average of the two z_surf
         if df.z_surf_1_adj.notnull().any() & df.z_surf_2_adj.notnull().any():
-            df['z_surf_combined'] = np.nanmean(
-                df[['z_surf_1_adj', 'z_surf_2_adj']].values, axis = 1)
+            df['z_surf_combined'] = df[['z_surf_1_adj', 'z_surf_2_adj']].mean(axis = 1).values
         elif df.z_surf_1_adj.notnull().any():
             df['z_surf_combined'] = df.z_surf_1_adj.values
         elif df.z_surf_2_adj.notnull().any():
@@ -307,17 +306,17 @@ def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
 
                 ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8]))
                 ind_ablation[ind_yr] = ind_abl_yr[ind_yr]
-                logger.info(y, 'no z_ice_surf, just using JJA')
+                logger.info(str(y)+' no z_ice_surf, just using JJA')
 
             if np.any(ind_abl_yr):
-                logger.info(y, 'derived from z_ice_surf')
+                logger.info(str(y)+ ' derived from z_ice_surf')
                 # if there are some ablation flagged for that year
                 # then find begining and end
                 ind_start[i] = np.argwhere(ind_abl_yr)[0][0]
                 ind_end[i] = np.argwhere(ind_abl_yr)[-1][0]
 
             else:
-                logger.info(y, 'could not estimate ablation season')
+                logger.info(str(y) + ' could not estimate ablation season')
                 # otherwise left as nan
                 ind_start[i] = -999
                 ind_end[i] = -999
@@ -329,7 +328,7 @@ def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
         # to hs1 and hs2 the year after.
 
         for i, y in enumerate(years):
-            logger.info(y)
+            logger.info(str(y))
             # defining subsets of hs1, hs2, z
             hs1_jja =  hs1[str(y)+'-06-01':str(y)+'-09-01']
             hs2_jja =  hs2[str(y)+'-06-01':str(y)+'-09-01']
@@ -517,9 +516,7 @@ def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002):
         df["z_surf_combined"] = np.nan
 
         # in winter, both SR1 and SR2 are used
-        df["z_surf_combined"] = np.nanmean( df[["z_surf_1_adj",
-                                                         "z_surf_2_adj"]].values, 
-                                                    axis = 1)
+        df["z_surf_combined"] = df[["z_surf_1_adj", "z_surf_2_adj"]].mean(axis=1).values
 
         # in ablation season we use SR2 instead of the SR1&2 average
         # here two options:
@@ -557,7 +554,7 @@ def hampel(vals_orig, k=7*24, t0=3):
     return(vals)
 
 def thermistorDepth(df_in, site,
-    installation_info_file = '../PROMICE-AWS-data-issues/fieldwork_summary_PROMICE_GC-Net.csv'):
+    installation_info_file = '../aws-l0/metadata/fieldwork_summary_PROMICE_GC-Net.csv'):
     '''Calculates the depth of the thermistors through time based on their 
     installation depth (collected in a google sheet) and on the change of surface
     height: instruments getting buried under new snow or surfacing due to ablation.
@@ -577,8 +574,18 @@ def thermistorDepth(df_in, site,
     '''
 
     logger.info('Calculating thermistor depth')
-
-    maintenance_string = pd.read_csv(installation_info_file)
+    
+    if os.path.isfile(installation_info_file):
+        maintenance_string = pd.read_csv(installation_info_file)
+        logger.info('loading '+installation_info_file)
+    else:
+        maintenance_string = pd.DataFrame(columns=['date_visit', 'station',
+               'length_of_thermistor_string_on_surface_from_surface_marking',
+               'depth_new_thermistor_m', 'note', 'depth_ablation_hose_arrival_m',
+               'depth_ablation_hose_departure_m', 'height_sr50_aws_arrival_cm',
+               'height_sr50_aws_departure_cm', 'height_sr50_stakes_arrival_cm',
+               'height_sr50_stakes_departure_cm', 'note.1'])
+        logger.info('cannot find '+installation_info_file)
 
     maintenance_string = maintenance_string.replace("OUT", np.nan)
     maintenance_string[
diff --git a/src/pypromice/process/test.py b/src/pypromice/process/test.py
index f5e26601..5c2ce6bb 100644
--- a/src/pypromice/process/test.py
+++ b/src/pypromice/process/test.py
@@ -40,6 +40,7 @@ def testAddAll(self):
                      datetime.datetime.now()-timedelta(days=365)]
         d.attrs['station_id']='TEST'
         d.attrs['level']='L2_test'
+        d.attrs['site_type']='ablation'
         meta = getMeta()
         d = addVars(d, v)
         d = addMeta(d, meta)
diff --git a/src/pypromice/test/test_config1.toml b/src/pypromice/test/test_config1.toml
index 09392c08..6b9e2e95 100644
--- a/src/pypromice/test/test_config1.toml
+++ b/src/pypromice/test/test_config1.toml
@@ -2,6 +2,7 @@ station_id = 'TEST1'
 logger_type = 'CR1000X'
 nodata     = ['-999', 'NAN'] # if one is a string, all must be strings
 number_of_booms = 1 #1-boom = promice, 2-boom = gc-net
+site_type = 'ablation'
 latitude = 79.91
 longitude = 24.09
 
diff --git a/src/pypromice/test/test_config2.toml b/src/pypromice/test/test_config2.toml
index 8213d955..195cd2ac 100644
--- a/src/pypromice/test/test_config2.toml
+++ b/src/pypromice/test/test_config2.toml
@@ -2,6 +2,7 @@ station_id = 'TEST2'
 logger_type = 'CR1000X'
 nodata     = ['-999', 'NAN'] # if one is a string, all must be strings
 number_of_booms = 2
+sitet_type = 'accumulation'
 
 ['test_raw_transmitted2.txt']
 format     = 'TX'

From 0b98589f3cec4fe9161b91bffe0e3e816de37a4e Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Thu, 27 Jun 2024 15:50:30 +0200
Subject: [PATCH 5/5] removing resampling within T10m calculation

---
 src/pypromice/process/L2toL3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py
index 39ae0793..ee7b32a8 100755
--- a/src/pypromice/process/L2toL3.py
+++ b/src/pypromice/process/L2toL3.py
@@ -741,7 +741,7 @@ def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=
     tmp = pd.DataFrame(temp)
     tmp["time"] = dates
     tmp = tmp.set_index("time")
-    tmp = tmp.resample("H").mean()
+    # tmp = tmp.resample("H").mean()
     # tmp = tmp.interpolate(limit=24*7)
     temp = tmp.loc[dates].values
     for i in (range(len(dates))):