From 16fdf84a302f4106f67aa0c2f2a821873c07d869 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:25:19 +0200 Subject: [PATCH] Revert "Merge branch 'feature/surface-heights-and-thermistor-depths' into feature/smoothing-and-extrapolating-gps-coordinates" This reverts commit b78db43636e3e205d2b7956859d7f794e196bfcc, reversing changes made to 285fd94bf2fe351fa3b2b30b471e9ed9435c510f. --- src/pypromice/process/L2toL3.py | 684 +------------------------- src/pypromice/process/test.py | 1 - src/pypromice/resources/variables.csv | 19 +- src/pypromice/test/test_config1.toml | 1 - src/pypromice/test/test_config2.toml | 1 - 5 files changed, 3 insertions(+), 703 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index f265d9b2..cd9e70fa 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -7,9 +7,8 @@ import xarray as xr import toml from sklearn.linear_model import LinearRegression -from pypromice.qc.github_data_issues import adjustData -from scipy.interpolate import interp1d -import logging, os + +import logging logger = logging.getLogger(__name__) def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273.15): @@ -17,8 +16,6 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 derived variables: - Turbulent fluxes - smoothed and inter/extrapolated GPS coordinates - - continuous surface height, ice surface height, snow height - - thermistor depths Parameters @@ -103,685 +100,8 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 # adding average coordinate as attribute for v in ['lat','lon','alt']: ds.attrs[v+'_avg'] = ds[v].mean(dim='time').item() - - # processing continuous surface height, ice surface height, snow height - ds = surfaceHeightProcessing(ds) - - return ds - - -def surfaceHeightProcessing(ds): - """ - Process surface height data for different site types and create - surface height variables. - - Parameters - ---------- - ds : xarray.Dataset - The dataset containing various measurements and attributes including - 'site_type' which determines the type of site (e.g., 'ablation', - 'accumulation', 'bedrock') and other relevant data variables such as - 'z_boom_u', 'z_stake', 'z_pt_cor', etc. - - Returns - ------- - xarray.Dataset - The dataset with additional processed surface height variables: - 'z_surf_1', 'z_surf_2', 'z_ice_surf', 'z_surf_combined', 'snow_height', - and possibly depth variables derived from temperature measurements. - """ - # Initialize surface height variables with NaNs - ds['z_surf_1'] = ('time', ds['z_boom_u'].data * np.nan) - ds['z_surf_2'] = ('time', ds['z_boom_u'].data * np.nan) - - if ds.attrs['site_type'] == 'ablation': - # Calculate surface heights for ablation sites - ds['z_surf_1'] = 2.6 - ds['z_boom_u'] - first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0] - if first_valid_index: - ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] - - # Use corrected point data if available - if 'z_pt_cor' in ds.data_vars: - ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data) - - else: - # Calculate surface heights for other site types - first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0] - ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u'] - if 'z_boom_l' in ds.data_vars: - first_valid_index = ds.time.where(ds.z_boom_l.notnull(), drop=True).data[0] - ds['z_surf_2'] = ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'] - if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any(): - first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0] - ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] - - # Adjust data for the created surface height variables - ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf']) - - # Convert to dataframe and combine surface height variables - df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe() - ds['z_surf_combined'], ds['z_ice_surf'] = combineSurfaceHeight(df_in, ds.attrs['site_type']) - - if ds.attrs['site_type'] == 'ablation': - # Calculate rolling minimum for ice surface height and snow height - ds['z_ice_surf'] = ds.z_surf_combined.to_series().rolling('365D').min() - ds['snow_height'] = ds['z_surf_combined'] - ds['z_ice_surf'] - elif ds.attrs['site_type'] in ['accumulation', 'bedrock']: - # Handle accumulation and bedrock site types - ds['z_ice_surf'] = ('time', ds['z_surf_1'].data * np.nan) - ds['snow_height'] = ds['z_surf_combined'] - else: - # Log info for other site types - logger.info('other site type') - - if ds.attrs['site_type'] != 'bedrock': - # Process ice temperature data and create depth variables - ice_temp_vars = [v for v in ds.data_vars if 't_i_' in v] - vars_out = [v.replace('t', 'd_t') for v in ice_temp_vars] - vars_out.append('t_i_10m') - df_out = thermistorDepth(ds[ice_temp_vars + ['z_surf_combined']].to_dataframe(), ds.attrs['station_id']) - for var in df_out.columns: - ds[var] = ('time', df_out[var].values) - return ds - -def combineSurfaceHeight(df, site_type, threshold_ablation = -0.0002): - '''Combines the data from three sensor: the two sonic rangers and the - pressure transducer, to recreate the surface height, the ice surface height - and the snow depth through the years. For the accumulation sites, it is - only the average of the two sonic rangers (after manual adjustments to - correct maintenance shifts). For the ablation sites, first an ablation - period is estimated each year (either the period when z_pt_cor decreases - or JJA if no better estimate) then different adjustmnents are conducted - to stitch the three time series together: z_ice_surface (adjusted from - z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake) - are used in the ablation period while an average of z_surf_1 and z_surf_2 - are used otherwise, after they are being adjusted to z_ice_surf at the end - of the ablation season. - - Parameters - ---------- - df : pandas.dataframe - Dataframe with datetime index and variables z_surf_1, z_surf_2 and z_ice_surf - site_type : str - Either 'accumulation' or 'ablation' - threshold_ablation : float - Threshold to which a z_pt_cor hourly decrease is compared. If the decrease - is higher, then there is ablation. - ''' - logger.info('Combining surface height') - if 'z_surf_2' not in df.columns: - logger.info('-> did not find z_surf_2') - df["z_surf_combined"] = hampel(df["z_surf_1"].interpolate(limit=72)).values - return df["z_surf_combined"], df["z_surf_combined"]*np.nan - - elif site_type == 'accumulation': - logger.info('-> no z_pt or accumulation site: averaging z_surf_1 and z_surf_2') - df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values - df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values - # adjusting z_surf_2 to z_surf_1 - df["z_surf_2_adj"] = df["z_surf_2_adj"] + (df["z_surf_1_adj"]- df["z_surf_2_adj"]).mean() - # z_surf_combined is the average of the two z_surf - if df.z_surf_1_adj.notnull().any() & df.z_surf_2_adj.notnull().any(): - df['z_surf_combined'] = df[['z_surf_1_adj', 'z_surf_2_adj']].mean(axis = 1).values - elif df.z_surf_1_adj.notnull().any(): - df['z_surf_combined'] = df.z_surf_1_adj.values - elif df.z_surf_2_adj.notnull().any(): - df['z_surf_combined'] = df.z_surf_2_adj.values - - df["z_surf_combined"] = hampel(df["z_surf_combined"].interpolate(limit=72)).values - return df["z_surf_combined"], df["z_surf_combined"]*np.nan - - else: - logger.info('-> ablation site') - # smoothing and filtering pressure transducer data - df["z_ice_surf_adj"] = hampel(df["z_ice_surf"].interpolate(limit=72)).values - df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values - df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72)).values - - # defining ice ablation period from the decrease of a smoothed version of z_pt - # meaning when smoothed_z_pt.diff() < threshold_ablation - # first smoothing - smoothed_PT = (df['z_ice_surf'] - .resample('H') - .interpolate(limit=72) - .rolling('14D',center=True, min_periods=1) - .mean()) - # second smoothing - smoothed_PT = smoothed_PT.rolling('14D', center=True, min_periods=1).mean() - - smoothed_PT = smoothed_PT.reindex(df.index,method='ffill') - smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan - - # logical index where ablation is detected - ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, - np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9])) - - # finding the beginning and end of each period with True - idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2) - idx[:, 1] -= 1 - # fill small gaps in the ice ablation periods. - for i in range(len(idx)-1): - ind = idx[i] - ind_next = idx[i+1] - # if the end of an ablation period is less than 15 days away from - # the next ablation, then it is still considered like the same ablation - # season - if df.index[ind_next[0]]-df.index[ind[1]] pd.to_timedelta('0H')): - # if the pressure transducer is installed the year after then - # we use the mean surface height 1 on its first week as a 0 - # for the ice height - z = z - z.loc[ - z.first_valid_index():(z.first_valid_index()+pd.to_timedelta('14D')) - ].mean() + hs1.iloc[:24*7].mean() - else: - # if there is more than a year (actually 251 days) between the - # initiation of the AWS and the installation of the pressure transducer - # we remove the intercept in the pressure transducer data. - # Removing the intercept - # means that we consider the ice surface height at 0 when the AWS - # is installed, and not when the pressure transducer is installed. - Y = z.iloc[:].values.reshape(-1, 1) - X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) - Y = Y[~np.isnan(Y)] - linear_regressor = LinearRegression() - linear_regressor.fit(X, Y) - Y_pred = linear_regressor.predict(z.index.astype(np.int64).values.reshape(-1, 1) ) - z = z-Y_pred[0] - - years = df.index.year.unique().values - ind_start = years.copy() - ind_end = years.copy() - logger.info('-> estimating ablation period for each year') - for i, y in enumerate(years): - # for each year - ind_yr = df.index.year.values==y - ind_abl_yr = np.logical_and(ind_yr, ind_ablation) - - if df.loc[ - np.logical_and(ind_yr, df.index.month.isin([6,7,8])), - "z_ice_surf_adj"].isnull().all(): - - ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8])) - ind_ablation[ind_yr] = ind_abl_yr[ind_yr] - logger.info(str(y)+' no z_ice_surf, just using JJA') - - if np.any(ind_abl_yr): - logger.info(str(y)+ ' derived from z_ice_surf') - # if there are some ablation flagged for that year - # then find begining and end - ind_start[i] = np.argwhere(ind_abl_yr)[0][0] - ind_end[i] = np.argwhere(ind_abl_yr)[-1][0] - - else: - logger.info(str(y) + ' could not estimate ablation season') - # otherwise left as nan - ind_start[i] = -999 - ind_end[i] = -999 - - # adjustement loop - missing_hs2 = 0 # if hs2 is missing then when it comes back it is adjusted to hs1 - hs2_ref = 0 # by default, the PT is the reference: hs1 and 2 will be adjusted to PT - # but if it is missing one year or one winter, then it needs to be rajusted - # to hs1 and hs2 the year after. - - for i, y in enumerate(years): - logger.info(str(y)) - # defining subsets of hs1, hs2, z - hs1_jja = hs1[str(y)+'-06-01':str(y)+'-09-01'] - hs2_jja = hs2[str(y)+'-06-01':str(y)+'-09-01'] - z_jja = z[str(y)+'-06-01':str(y)+'-09-01'] - - z_ablation = z.iloc[ind_start[i]:ind_end[i]] - hs2_ablation = hs2.iloc[ind_start[i]:ind_end[i]] - - hs1_year = hs1[str(y)] - hs2_year = hs2[str(y)] - - hs2_winter = hs2[str(y)+'-01-01':str(y)+'-03-01'].copy() - z_winter = z[str(y)+'-01-01':str(y)+'-03-01'].copy() - - hs1_following_winter = hs1[str(y)+'-09-01':str(y+1)+'-03-01'].copy() - hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy() - - z_year = z[str(y)] - if hs1_jja.isnull().all() and hs2_jja.isnull().all() and z_jja.isnull().all(): - # if there is no height for a year between June and September - # then the adjustment cannot be made automatically - # it needs to be specified manually on the adjustment files - # on https://github.com/GEUS-Glaciology-and-Climate/PROMICE-AWS-data-issues - continue - - if all(np.isnan(z_jja)) and any(~np.isnan(hs2_jja)): - # if there is no PT for a given year, but there is some hs2 - # then z will be adjusted to hs2 next time it is available - hs2_ref = 1 - - if all(np.isnan(z_winter)) and all(np.isnan(hs2_winter)): - # if there is no PT nor hs2 during the winter, then again - # we need to adjust z to match hs2 when ablation starts - hs2_ref = 1 - - - if hs2_ref: - if ind_start[i] != -999: - # the first year there is both ablation and PT data available - # then PT is adjusted to hs2 - if any(~np.isnan(z_ablation)) and any(~np.isnan(hs2_ablation)): - logger.info('adjusting z to hs2') - tmp1 = z_ablation.copy() - tmp2 = hs2_ablation.copy() - tmp1[np.isnan(tmp2)] = np.nan - tmp2[np.isnan(tmp1)] = np.nan - - # in some instances, the PT data is available but no ablation - # is recorded, then hs2 remains the reference during that time. - # When eventually there is ablation, then we need to find the - # first index in these preceding ablation-free years - # the shift will be applied back from this point - first_index = z[:z[str(y)].first_valid_index()].isnull().iloc[::-1].idxmax() - z[first_index:] = z[first_index:] - np.nanmean(tmp1) + np.nanmean(tmp2) - hs2_ref = 0 # from now on PT is the reference - else: - # if there is some ablation - if (ind_start[i] != -999) & z_year.notnull().any(): - # calculating first index with PT, hs1 and hs2 - first_index = z_year.first_valid_index() - if hs1_year.notnull().any(): - first_index = np.max(np.array( - [first_index, - hs1_year.first_valid_index()])) - if hs2_year.notnull().any(): - first_index = np.max(np.array( - [first_index, - hs2_year.first_valid_index()])) - - # if PT, hs1 and hs2 are all nan until station is reactivated, then - first_day_of_year = pd.to_datetime(str(y)+'-01-01') - - if len(z[first_day_of_year:first_index-pd.to_timedelta('1D')])>0: - if z[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \ - hs1[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \ - hs2[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all(): - if (~np.isnan(np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])) \ - and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))): - logger.info(' ======= adjusting hs1 and hs2 to z_pt') - if ~np.isnan(np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) ): - hs1[first_index:] = hs1[first_index:] \ - - np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) \ - + np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')]) - if ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]) ): - hs2[first_index:] = hs2[first_index:] \ - - np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]) \ - + np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')]) - - # adapting hs2 - if (ind_end[i] != -999): - # and if there are PT data available at the end of the melt season - if np.any(~np.isnan(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)])): - logger.info('adjusting hs2 to z') - # then we adjust hs2 to the end-of-ablation z - # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): - logger.info('using end of melt season') - hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ - np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) + \ - np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) - # if not possible, then trying the end of the following accumulation season - elif (i+1 < len(ind_start)): - if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): - logger.info('using end of accumulation season') - hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ - np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \ - np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) - else: - logger.info('no ablation') - - if all(np.isnan(hs2_following_winter)): - logger.info('no hs2') - missing_hs2 = 1 - elif missing_hs2 == 1: - logger.info('adjusting hs2') - # and if there are some hs2 during the accumulation period - if any(~np.isnan(hs1_following_winter)): - logger.info('to hs1') - # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match - # for the period when both are available - hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan - hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan - - hs2[str(y)+'-01-01':] = hs2[str(y)+'-01-01':] \ - - np.nanmean(hs2_following_winter) + np.nanmean(hs1_following_winter) - missing_hs2 = 0 - - # adjusting hs1 to hs2 (no ablation case) - if any(~np.isnan(hs1_following_winter)): - logger.info('adjusting hs1') - # and if there are some hs2 during the accumulation period - if any(~np.isnan(hs2_following_winter)): - logger.info('to hs2') - # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match - # for the period when both are available - hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan - hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan - - hs1[str(y)+'-09-01':] = hs1[str(y)+'-09-01':] \ - - np.nanmean(hs1_following_winter) + np.nanmean(hs2_following_winter) - - if ind_end[i] != -999: - # if there is some hs1 - - if any(~np.isnan(hs1_following_winter)): - logger.info('adjusting hs1') - # and if there are some hs2 during the accumulation period - if any(~np.isnan(hs2_following_winter)): - logger.info('to hs2') - # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match - # for the period when both are available - tmp1 = hs1.iloc[ind_end[i]:min(len(hs1),ind_end[i]+24*30*9)].copy() - tmp2 = hs2.iloc[ind_end[i]:min(len(hs2),ind_end[i]+24*30*9)].copy() - - tmp1[np.isnan(tmp2)] = np.nan - tmp2[np.isnan(tmp1)] = np.nan - - hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - np.nanmean(tmp1) + np.nanmean(tmp2) - - # if no hs2, then use PT data available at the end of the melt season - elif np.any(~np.isnan(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)])): - logger.info('to z') - # then we adjust hs2 to the end-of-ablation z - # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): - logger.info('using end of melt season') - hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ - np.nanmean(hs1.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) + \ - np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) - # if not possible, then trying the end of the following accumulation season - elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): - logger.info('using end of accumulation season') - hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ - np.nanmean(hs1.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \ - np.nanmean(z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) - - df["z_surf_1_adj"] = hs1.interpolate(limit=2*24).values - df["z_surf_2_adj"] = hs2.interpolate(limit=2*24).values - df["z_ice_surf_adj"] = z.interpolate(limit=2*24).values - - # making a summary of the surface height - df["z_surf_combined"] = np.nan - - # in winter, both SR1 and SR2 are used - df["z_surf_combined"] = df[["z_surf_1_adj", "z_surf_2_adj"]].mean(axis=1).values - - # in ablation season we use SR2 instead of the SR1&2 average - # here two options: - # 1) we ignore the SR1 and only use SR2 - # 2) we use SR1 when SR2 is not available (commented) - # the later one can cause jumps when SR2 starts to be available few days after SR1 - data_update = df["z_surf_2_adj"].interpolate(limit=72).values - ind_update = ind_ablation - #ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] - - # in ablation season we use pressure transducer over all other options - data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values - ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] - logger.info('surface height combination finished') - return df['z_surf_combined'], df["z_ice_surf_adj"] - -def hampel(vals_orig, k=7*24, t0=3): - ''' - vals: pandas series of values from which to remove outliers - k: size of window (including the sample; 7 is equal to 3 on either side of value) - ''' - #Make copy so original not edited - vals=vals_orig.copy() - #Hampel Filter - L= 1.4826 - rolling_median=vals.rolling(k).median() - difference=np.abs(rolling_median-vals) - median_abs_deviation=difference.rolling(k).median() - threshold= t0 *L * median_abs_deviation - outlier_idx=difference>threshold - outlier_idx[0:round(k/2)]=False - vals.loc[outlier_idx]=np.nan - return(vals) - -def thermistorDepth(df_in, site, - installation_info_file = '../aws-l0/metadata/fieldwork_summary_PROMICE_GC-Net.csv'): - '''Calculates the depth of the thermistors through time based on their - installation depth (collected in a google sheet) and on the change of surface - height: instruments getting buried under new snow or surfacing due to ablation. - There is a potential for additional filtering of thermistor data for surfaced - (or just noisy) thermistors, but that is currently deactivated because slow. - - Parameters - ---------- - df_in : pandas:dataframe - dataframe containing the ice/firn temperature t_i_* as well as the - combined surface height z_surf_combined - site : str - stid, so that maintenance date and sensor installation depths can be found - in database - installation_info_file : str - path to file with thermistor installation dates and depths - ''' - - logger.info('Calculating thermistor depth') - - if os.path.isfile(installation_info_file): - maintenance_string = pd.read_csv(installation_info_file) - logger.info('loading '+installation_info_file) - else: - maintenance_string = pd.DataFrame(columns=['date_visit', 'station', - 'length_of_thermistor_string_on_surface_from_surface_marking', - 'depth_new_thermistor_m', 'note', 'depth_ablation_hose_arrival_m', - 'depth_ablation_hose_departure_m', 'height_sr50_aws_arrival_cm', - 'height_sr50_aws_departure_cm', 'height_sr50_stakes_arrival_cm', - 'height_sr50_stakes_departure_cm', 'note.1']) - logger.info('cannot find '+installation_info_file) - - maintenance_string = maintenance_string.replace("OUT", np.nan) - maintenance_string[ - "length_of_thermistor_string_on_surface_from_surface_marking" - ] = maintenance_string[ - "length_of_thermistor_string_on_surface_from_surface_marking" - ].astype( - float - ) - maintenance_string["date"] = pd.to_datetime(maintenance_string["date_visit"]) - maintenance = maintenance_string.loc[maintenance_string.station == site] - - temp_cols_name = ['t_i_'+str(i) for i in range(12) if 't_i_'+str(i) in df_in.columns] - num_therm = len(temp_cols_name) - depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)] - if num_therm == 8: - ini_depth = [1, 2, 3, 4, 5, 6, 7, 10] - else: - ini_depth = [0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5] - df_in[depth_cols_name] = np.nan - - # filtering the surface height - surface_height = df_in["z_surf_combined"].copy() - ind_filter = surface_height.rolling(window=14, center=True).var() > 0.1 - if any(ind_filter): - surface_height[ind_filter] = np.nan - df_in["z_surf_combined"] = surface_height.values - z_surf_interp = df_in["z_surf_combined"].interpolate() - - # first initialization of the depths - for i, col in enumerate(depth_cols_name): - df_in[col] = ( - ini_depth[i] - + z_surf_interp.values - - z_surf_interp[z_surf_interp.first_valid_index()] - ) - - # reseting depth at maintenance - if len(maintenance.date) == 0: - logger.info("No maintenance at "+site) - - for date in maintenance.date: - if date > z_surf_interp.last_valid_index(): - continue - new_depth = maintenance.loc[ - maintenance.date == date - ].depth_new_thermistor_m.values[0] - if isinstance(new_depth, str): - logger.info('thermistor reinstalled on '+date.strftime('%Y-%m-%d %X')) - new_depth = [float(x) for x in new_depth.split(",")] - for i, col in enumerate(depth_cols_name): - tmp = df_in[col].copy() - tmp.loc[date:] = ( - new_depth[i] - + z_surf_interp[date:].values - - z_surf_interp[date:][ - z_surf_interp[date:].first_valid_index() - ] - ) - df_in[col] = tmp.values - - # % Filtering thermistor data - # for i in range(len(temp_cols_name)): - # tmp = df_in[temp_cols_name[i]].copy() - - # # variance filter - # ind_filter = ( - # df_in[temp_cols_name[i]] - # .interpolate(limit=14) - # .rolling(window=7) - # .var() - # > 0.5 - # ) - # month = ( - # df_in[temp_cols_name[i]].interpolate(limit=14).index.month.values - # ) - # ind_filter.loc[np.isin(month, [5, 6, 7])] = False - # if any(ind_filter): - # tmp.loc[ind_filter] = np.nan - - # # before and after maintenance adaptation filter - # if len(maintenance.date) > 0: - # for date in maintenance.date: - # if isinstance( - # maintenance.loc[ - # maintenance.date == date - # ].depth_new_thermistor_m.values[0], - # str, - # ): - # ind_adapt = np.abs( - # tmp.interpolate(limit=14).index.values - # - pd.to_datetime(date).to_datetime64() - # ) < np.timedelta64(7, "D") - # if any(ind_adapt): - # tmp.loc[ind_adapt] = np.nan - - # # surfaced thermistor - # ind_pos = df_in[depth_cols_name[i]] < 0.1 - # if any(ind_pos): - # tmp.loc[ind_pos] = np.nan - # # copying the filtered values to the original table - # df_in[temp_cols_name[i]] = tmp.values - - # interpolating 10 m firn/ice temp - df_in['t_i_10m'] = interpolate_temperature( - df_in.index.values, - df_in[depth_cols_name].values.astype(float), - df_in[temp_cols_name].values.astype(float), - kind="linear", - min_diff_to_depth=1.5, - ).set_index('date').values - - # filtering - ind_pos = df_in["t_i_10m"] > 0.1 - ind_low = df_in["t_i_10m"] < -70 - df_in.loc[ind_pos, "t_i_10m"] = np.nan - df_in.loc[ind_low, "t_i_10m"] = np.nan - depth_cols_name.append('t_i_10m') - return df_in[depth_cols_name] - - -def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=2, - kind="quadratic"): - '''Calculates the depth of the thermistors through time based on their - installation depth (collected in a google sheet) and on the change of surface - height: instruments getting buried under new snow or surfacing due to ablation. - There is a potential for additional filtering of thermistor data for surfaced - (or just noisy) thermistors, but that is currently deactivated because slow. - - Parameters - ---------- - dates : numpy.array - array of datetime64 - depth_cor : numpy.ndarray - matrix of depths - temp : numpy.ndarray - matrix of temperatures - depth : float - constant depth at which (depth_cor, temp) should be interpolated. - min_diff_to_depth: float - maximum difference allowed between the available depht and the target depth - for the interpolation to be done. - kind : str - type of interpolation from scipy.interpolate.interp1d - ''' - - depth_cor = depth_cor.astype(float) - df_interp = pd.DataFrame() - df_interp["date"] = dates - df_interp["temperatureObserved"] = np.nan - - # preprocessing temperatures for small gaps - tmp = pd.DataFrame(temp) - tmp["time"] = dates - tmp = tmp.set_index("time") - # tmp = tmp.resample("H").mean() - # tmp = tmp.interpolate(limit=24*7) - temp = tmp.loc[dates].values - for i in (range(len(dates))): - x = depth_cor[i, :].astype(float) - y = temp[i, :].astype(float) - ind_no_nan = ~np.isnan(x + y) - x = x[ind_no_nan] - y = y[ind_no_nan] - x, indices = np.unique(x, return_index=True) - y = y[indices] - if len(x) < 2 or np.min(np.abs(x - depth)) > min_diff_to_depth: - continue - f = interp1d(x, y, kind, fill_value="extrapolate") - df_interp.iloc[i, 1] = np.min(f(depth), 0) - - if df_interp.iloc[:5, 1].std() > 0.1: - df_interp.iloc[:5, 1] = np.nan - - return df_interp - def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/station_configurations/'): # saving the static value of 'lat','lon' or 'alt' stored in attribute # as it might be the only coordinate available for certain stations (e.g. bedrock) diff --git a/src/pypromice/process/test.py b/src/pypromice/process/test.py index 5c2ce6bb..f5e26601 100644 --- a/src/pypromice/process/test.py +++ b/src/pypromice/process/test.py @@ -40,7 +40,6 @@ def testAddAll(self): datetime.datetime.now()-timedelta(days=365)] d.attrs['station_id']='TEST' d.attrs['level']='L2_test' - d.attrs['site_type']='ablation' meta = getMeta() d = addVars(d, v) d = addMeta(d, meta) diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index 5979d63d..77eaa7b8 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -88,21 +88,4 @@ rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corre wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 -wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 -z_surf_1,height_of_surface_1,"Surface height 1, relative to surface height at installation and calculated from boom (upper one if two)",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4 -z_surf_2,height_of_surface_2,"Surface height 2, relative to surface height at installation and calculated from stake booms and pt",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4 -z_surf_combined,height_of_surface_combined,"Surface height combined from multiple sensors, relative to ice surface height at installation",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4 -z_ice_surf,height_of_ice_surface,"Ice surface height, relative to ice surface height at installation and calculated from pt_cor and z_stake",m,modelResult,time,FALSE,L3,,,,one-boom,0,0,1,4 -snow_height,height_of_snow,"Snow surface height, relative to ice surface",m,modelResult,time,FALSE,L3,0,,,one-boom,0,0,1,4 -d_t_i_1,depth_of_thermistor_1,Depth of thermistor 1,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_2,depth_of_thermistor_2,Depth of thermistor 2,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_3,depth_of_thermistor_3,Depth of thermistor 3,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_4,depth_of_thermistor_4,Depth of thermistor 4,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_5,depth_of_thermistor_5,Depth of thermistor 5,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_6,depth_of_thermistor_6,Depth of thermistor 6,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_7,depth_of_thermistor_7,Depth of thermistor 7,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_8,depth_of_thermistor_8,Depth of thermistor 8,m,modelResult,time,FALSE,L3,-10,100,,all,0,0,1,4 -d_t_i_9,depth_of_thermistor_9,Depth of thermistor 9,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4 -d_t_i_10,depth_of_thermistor_10,Depth of thermistor 10,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4 -d_t_i_11,depth_of_thermistor_11,Depth of thermistor 11,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4 -t_i_10m,10m_subsurface_temperature,10 m subsurface temperature,degrees_C,modelResult,time,FALSE,L3,-70,0,,all,0,0,1,4 +wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 \ No newline at end of file diff --git a/src/pypromice/test/test_config1.toml b/src/pypromice/test/test_config1.toml index 6b9e2e95..09392c08 100644 --- a/src/pypromice/test/test_config1.toml +++ b/src/pypromice/test/test_config1.toml @@ -2,7 +2,6 @@ station_id = 'TEST1' logger_type = 'CR1000X' nodata = ['-999', 'NAN'] # if one is a string, all must be strings number_of_booms = 1 #1-boom = promice, 2-boom = gc-net -site_type = 'ablation' latitude = 79.91 longitude = 24.09 diff --git a/src/pypromice/test/test_config2.toml b/src/pypromice/test/test_config2.toml index 195cd2ac..8213d955 100644 --- a/src/pypromice/test/test_config2.toml +++ b/src/pypromice/test/test_config2.toml @@ -2,7 +2,6 @@ station_id = 'TEST2' logger_type = 'CR1000X' nodata = ['-999', 'NAN'] # if one is a string, all must be strings number_of_booms = 2 -sitet_type = 'accumulation' ['test_raw_transmitted2.txt'] format = 'TX'