From 671cf92ce8230481b30299c12b2968ba4ce0ce2b Mon Sep 17 00:00:00 2001 From: Penny How Date: Thu, 24 Oct 2024 11:03:54 -0100 Subject: [PATCH 01/18] NUK_P tx format added (#310) --- src/pypromice/tx/payload_formats.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pypromice/tx/payload_formats.csv b/src/pypromice/tx/payload_formats.csv index b70f92e8..34f446a3 100644 --- a/src/pypromice/tx/payload_formats.csv +++ b/src/pypromice/tx/payload_formats.csv @@ -67,6 +67,7 @@ type,expected_values,format,description,flags,notes 90,48, tfffffffffffffffffffffffffffffffffgnefffffffffff,THE GC-NET VERSION!,don’t use,GC-NET stations 90,48,tfffffffffffffffffffffffffffffffffgnefffffffffff,THE GC-NET VERSION!,,GC-NET stations 95,46,tfffffffffffffffffffffffffffffffgnefffffffffff,THE GC-NET VERSION!,,GC-NET stations +115,47,tfffffffffffffffffffffffffffffffffgneffffffffff,PREMAS version,,PREMAS stations 220,29,tfffffffffffffffffffffffnefff,ZAMG Freya 2015 summer message,,ZAMG Freya aws 221,0,,,,ZAMG Freya aws 222,29,tfffffffffffffffffffffffnefff,ZAMG Freya 2015 winter message,,ZAMG Freya aws From 8610065ed280dbcf0de32bb84d3663d21910dd21 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Fri, 8 Nov 2024 09:41:52 +0100 Subject: [PATCH 02/18] update rh_*_cor to rh_'_wrt_ice_or_water --- src/pypromice/process/L1toL2.py | 19 +- src/pypromice/process/L2toL3.py | 442 +++++++++--------- src/pypromice/process/resample.py | 6 +- src/pypromice/process/value_clipping.py | 2 - .../resources/variable_aliases_GC-Net.csv | 4 +- src/pypromice/resources/variables.csv | 6 +- 6 files changed, 240 insertions(+), 239 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 8eaf8fe7..a8039017 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -102,16 +102,16 @@ def toL2( # calculating realtive humidity with regard to ice T_100 = _getTempK(T_0) - ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], + ds['rh_u_wrt_ice_or_water'] = adjustHumidity(ds['rh_u'], ds['t_u'], T_0, T_100, ews, ei0) if ds.attrs['number_of_booms']==2: - ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], + ds['rh_l_wrt_ice_or_water'] = adjustHumidity(ds['rh_l'], ds['t_l'], T_0, T_100, ews, ei0) if hasattr(ds,'t_i'): if ~ds['t_i'].isnull().all(): - ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], + ds['rh_i_wrt_ice_or_water'] = adjustHumidity(ds['rh_i'], ds['t_i'], T_0, T_100, ews, ei0) # Determiune cloud cover for on-ice stations @@ -419,9 +419,12 @@ def calcTilt(tilt_x, tilt_y, deg2rad): return phi_sensor_rad, theta_sensor_rad -def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed - '''Correct relative humidity using Groff & Gratch method, where values are - set when freezing and remain the original values when not freezing +def adjustHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed + '''Adjust relative humidity so that values are given with respect to + saturation over ice in subfreezing conditions, and with respect to + saturation over water (as given by the instrument) above the melting + point temperature. Saturation water vapors are calculated after + Groff & Gratch method. Parameters ---------- @@ -440,7 +443,7 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f Returns ------- - rh_cor : xarray.DataArray + rh_wrt_ice_or_water : xarray.DataArray Corrected relative humidity ''' # Convert to hPa (Groff & Gratch) @@ -458,7 +461,7 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f freezing = (T < 0) & (T > -100).values # Set to Groff & Gratch values when freezing, otherwise just rh - rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) + rh_wrt_ice_or_water = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) return rh_cor diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 8cd0ea5c..d6d7859d 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -def toL3(L2, +def toL3(L2, data_adjustments_dir: Path, station_config={}, T_0=273.15): @@ -23,46 +23,46 @@ def toL3(L2, - smoothed and inter/extrapolated GPS coordinates - continuous surface height, ice surface height, snow height - thermistor depths - - + + Parameters ---------- L2 : xarray:Dataset L2 AWS data station_config : Dict - Dictionary containing the information necessary for the processing of + Dictionary containing the information necessary for the processing of L3 variables (relocation dates for coordinates processing, or thermistor string maintenance date for the thermistors depth) - T_0 : int + T_0 : int Freezing point temperature. Default is 273.15. ''' ds = L2 ds.attrs['level'] = 'L3' - T_100 = T_0+100 # Get steam point temperature as K - + T_100 = T_0+100 # Get steam point temperature as K + # Turbulent heat flux calculation if ('t_u' in ds.keys()) and \ ('p_u' in ds.keys()) and \ - ('rh_u_cor' in ds.keys()): + ('rh_u_wrt_ice_or_water' in ds.keys()): # Upper boom bulk calculation T_h_u = ds['t_u'].copy() # Copy for processing p_h_u = ds['p_u'].copy() - RH_cor_h_u = ds['rh_u_cor'].copy() - - q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + rh_h_u_wrt_ice_or_water = ds['rh_u_wrt_ice_or_water'].copy() + + q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, rh_h_u_wrt_ice_or_water) # Calculate specific humidity if ('wspd_u' in ds.keys()) and \ ('t_surf' in ds.keys()) and \ ('z_boom_u' in ds.keys()): WS_h_u = ds['wspd_u'].copy() Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? - z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer - z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - - if not ds.attrs['bedrock']: + z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer + z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer + + if not ds.attrs['bedrock']: SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, q_h_u, p_h_u) - + z_WS_u, z_T_u, q_h_u, p_h_u) + ds['dshf_u'] = (('time'), SHF_h_u.data) ds['dlhf_u'] = (('time'), LHF_h_u.data) else: @@ -71,80 +71,80 @@ def toL3(L2, q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg ds['qh_u'] = (('time'), q_h_u.data) else: - logger.info('t_u, p_u or rh_u_cor missing, cannot calulate tubrulent heat fluxes') + logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes') # Lower boom bulk calculation if ds.attrs['number_of_booms']==2: if ('t_l' in ds.keys()) and \ ('p_l' in ds.keys()) and \ - ('rh_l_cor' in ds.keys()): + ('rh_l_wrt_ice_or_water' in ds.keys()): T_h_l = ds['t_l'].copy() # Copy for processing - p_h_l = ds['p_l'].copy() - RH_cor_h_l = ds['rh_l_cor'].copy() + p_h_l = ds['p_l'].copy() + rh_h_l_wrt_ice_or_water = ds['rh_l_wrt_ice_or_water'].copy() - q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity + q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, rh_h_l_wrt_ice_or_water) # Calculate sp.humidity if ('wspd_l' in ds.keys()) and \ ('t_surf' in ds.keys()) and \ ('z_boom_l' in ds.keys()): - z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W - z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - WS_h_l = ds['wspd_l'].copy() - if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calculate_tubulent_heat_fluxes(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, q_h_l, p_h_l) - + z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W + z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer + WS_h_l = ds['wspd_l'].copy() + if not ds.attrs['bedrock']: + SHF_h_l, LHF_h_l= calculate_tubulent_heat_fluxes(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + ds['dshf_l'] = (('time'), SHF_h_l.data) ds['dlhf_l'] = (('time'), LHF_h_l.data) else: logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes') - + q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg ds['qh_l'] = (('time'), q_h_l.data) else: - logger.info('t_l, p_l or rh_l_cor missing, cannot calulate tubrulent heat fluxes') + logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes') if len(station_config)==0: logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n') - - # Smoothing and inter/extrapolation of GPS coordinates + + # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: ds[var.replace('gps_','')] = ('time', gps_coordinate_postprocessing(ds, var, station_config)) - + # processing continuous surface height, ice surface height, snow height try: ds = process_surface_height(ds, data_adjustments_dir, station_config) except Exception as e: logger.error("Error processing surface height at %s"%L2.attrs['station_id']) logging.error(e, exc_info=True) - + # making sure dataset has the attributes contained in the config files if 'project' in station_config.keys(): ds.attrs['project'] = station_config['project'] else: logger.error('No project info in station_config. Using \"PROMICE\".') ds.attrs['project'] = "PROMICE" - + if 'location_type' in station_config.keys(): ds.attrs['location_type'] = station_config['location_type'] else: logger.error('No project info in station_config. Using \"ice sheet\".') ds.attrs['location_type'] = "ice sheet" - + return ds def process_surface_height(ds, data_adjustments_dir, station_config={}): """ - Process surface height data for different site types and create + Process surface height data for different site types and create surface height variables. Parameters ---------- ds : xarray.Dataset The dataset containing various measurements and attributes including - 'site_type' which determines the type of site (e.g., 'ablation', - 'accumulation', 'bedrock') and other relevant data variables such as + 'site_type' which determines the type of site (e.g., 'ablation', + 'accumulation', 'bedrock') and other relevant data variables such as 'z_boom_u', 'z_stake', 'z_pt_cor', etc. Returns @@ -164,18 +164,18 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): if ds.z_stake.notnull().any(): first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0] ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] - + # Use corrected point data if available if 'z_pt_cor' in ds.data_vars: ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data) - + else: # Calculate surface heights for other site types first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0] ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u'] if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any(): first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0] - ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] + ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] if 'z_boom_l' in ds.data_vars: # need a combine first because KAN_U switches from having a z_stake # to having a z_boom_l @@ -191,7 +191,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): (ds['z_surf_combined'], ds['z_ice_surf'], ds['z_surf_1_adj'], ds['z_surf_2_adj']) = combine_surface_height(df_in, ds.attrs['site_type']) - + if ds.attrs['site_type'] == 'ablation': # Calculate rolling minimum for ice surface height and snow height @@ -217,22 +217,22 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): z_ice_surf = (ts_interpolated .rolling('1D', center=True, min_periods=1) .median()) - + z_ice_surf = z_ice_surf.loc[ds.time] # here we make sure that the periods where both z_stake and z_pt are # missing are also missing in z_ice_surf msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull() - z_ice_surf = z_ice_surf.where(msk) - + z_ice_surf = z_ice_surf.where(msk) + # taking running minimum to get ice z_ice_surf = z_ice_surf.cummin() # filling gaps only if they are less than a year long and if values on both # sides are less than 0.01 m appart - + # Forward and backward fill to identify bounds of gaps df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill') - + # Identify gaps and their start and end dates gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index) gaps['prev_value'] = df_filled.shift(1) @@ -241,17 +241,17 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): gaps['gap_end'] = gaps.index.to_series().shift(-1) gaps['gap_duration'] = (gaps['gap_end'] - gaps['gap_start']).dt.days gaps['value_diff'] = (gaps['next_value'] - gaps['prev_value']).abs() - + # Determine which gaps to fill mask = (gaps['gap_duration'] < 365) & (gaps['value_diff'] < 0.01) gaps_to_fill = gaps[mask].index - + # Fill gaps in the original Series z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill] - + # bringing the variable into the dataset ds['z_ice_surf'] = z_ice_surf - + ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf']) ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf']) ds['z_ice_surf'] = ds['z_ice_surf'].where(ds.snow_height.notnull()) @@ -274,43 +274,43 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): station_config) for var in df_out.columns: ds[var] = ('time', df_out[var].values) - + return ds def combine_surface_height(df, site_type, threshold_ablation = -0.0002): '''Combines the data from three sensor: the two sonic rangers and the pressure transducer, to recreate the surface height, the ice surface height and the snow depth through the years. For the accumulation sites, it is - only the average of the two sonic rangers (after manual adjustments to - correct maintenance shifts). For the ablation sites, first an ablation + only the average of the two sonic rangers (after manual adjustments to + correct maintenance shifts). For the ablation sites, first an ablation period is estimated each year (either the period when z_pt_cor decreases - or JJA if no better estimate) then different adjustmnents are conducted + or JJA if no better estimate) then different adjustmnents are conducted to stitch the three time series together: z_ice_surface (adjusted from z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake) are used in the ablation period while an average of z_surf_1 and z_surf_2 - are used otherwise, after they are being adjusted to z_ice_surf at the end + are used otherwise, after they are being adjusted to z_ice_surf at the end of the ablation season. - + Parameters ---------- df : pandas.dataframe Dataframe with datetime index and variables z_surf_1, z_surf_2 and z_ice_surf - site_type : str + site_type : str Either 'accumulation' or 'ablation' threshold_ablation : float Threshold to which a z_pt_cor hourly decrease is compared. If the decrease is higher, then there is ablation. ''' logger.info('Combining surface height') - + if 'z_surf_2' not in df.columns: logger.info('-> did not find z_surf_2') df["z_surf_2"] = df["z_surf_1"].values*np.nan - + if 'z_ice_surf' not in df.columns: logger.info('-> did not find z_ice_surf') df["z_ice_surf"] = df["z_surf_1"].values*np.nan - + if site_type in ['accumulation', 'bedrock']: logger.info('-> no z_pt or accumulation site: averaging z_surf_1 and z_surf_2') df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values @@ -326,7 +326,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df['z_surf_combined'] = df.z_surf_2_adj.values # df["z_surf_combined"] = hampel(df["z_surf_combined"].interpolate(limit=72)).values - return (df['z_surf_combined'], df["z_surf_combined"]*np.nan, + return (df['z_surf_combined'], df["z_surf_combined"]*np.nan, df["z_surf_1_adj"], df["z_surf_2_adj"]) else: @@ -340,7 +340,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72), k=24, t0=5).values # defining ice ablation period from the decrease of a smoothed version of z_pt - # meaning when smoothed_z_pt.diff() < threshold_ablation + # meaning when smoothed_z_pt.diff() < threshold_ablation # first smoothing smoothed_PT = (df['z_ice_surf'] .resample('h') @@ -354,14 +354,14 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan # logical index where ablation is detected - ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, + ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9])) # finding the beginning and end of each period with True idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2) idx[:, 1] -= 1 - + # fill small gaps in the ice ablation periods. for i in range(len(idx)-1): ind = idx[i] @@ -371,20 +371,20 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # season if df.index[ind_next[0]]-df.index[ind[1]]= period_start) & (df.index < period_end) ind_ablation[exclusion_period] = False - + hs1=df["z_surf_1_adj"].interpolate(limit=24*2).copy() hs2=df["z_surf_2_adj"].interpolate(limit=24*2).copy() z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy() @@ -397,9 +397,9 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs2.iloc[:24*7])) & any(~np.isnan(hs1.iloc[:24*7])): hs2 = hs2 + hs1.iloc[:24*7].mean() - hs2.iloc[:24*7].mean() - + if any(~np.isnan(z.iloc[:24*7])): - # expressing ice surface height relative to its mean value in the + # expressing ice surface height relative to its mean value in the # first week of the record z = z - z.iloc[:24*7].mean() elif z.notnull().any(): @@ -414,16 +414,16 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): z.first_valid_index():(z.first_valid_index()+pd.to_timedelta('14D')) ].mean() + hs1.iloc[:24*7].mean() else: - # if there is more than a year (actually 251 days) between the + # if there is more than a year (actually 251 days) between the # initiation of the AWS and the installation of the pressure transducer # we remove the intercept in the pressure transducer data. - # Removing the intercept + # Removing the intercept # means that we consider the ice surface height at 0 when the AWS # is installed, and not when the pressure transducer is installed. Y = z.iloc[:].values.reshape(-1, 1) - X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) - Y = Y[~np.isnan(Y)] - linear_regressor = LinearRegression() + X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) + Y = Y[~np.isnan(Y)] + linear_regressor = LinearRegression() linear_regressor.fit(X, Y) Y_pred = linear_regressor.predict(z.index.astype(np.int64).values.reshape(-1, 1) ) z = z-Y_pred[0] @@ -444,7 +444,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8])) ind_ablation[ind_yr] = ind_abl_yr[ind_yr] logger.debug(str(y)+' no z_ice_surf, just using JJA') - + else: logger.debug(str(y)+ ' derived from z_ice_surf') @@ -494,7 +494,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if all(np.isnan(z_jja)) and any(~np.isnan(hs2_jja)): # if there is no PT for a given year, but there is some hs2 - # then z will be adjusted to hs2 next time it is available + # then z will be adjusted to hs2 next time it is available hs2_ref = 1 if all(np.isnan(z_winter)) and all(np.isnan(hs2_winter)): @@ -518,7 +518,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # in some instances, the PT data is available but no ablation # is recorded, then hs2 remains the reference during that time. - # When eventually there is ablation, then we need to find the + # When eventually there is ablation, then we need to find the # first index in these preceding ablation-free years # the shift will be applied back from this point # first_index = z[:z[str(y)].first_valid_index()].isnull().iloc[::-1].idxmax() @@ -538,13 +538,13 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): else: logger.debug('adjusting z to hs1') first_index = hs2.iloc[ind_start[i]:].first_valid_index() - z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] + z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] else: - logger.debug('adjusting z to hs1') - z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] + logger.debug('adjusting z to hs1') + z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] hs2_ref = 0 # from now on PT is the reference - - + + else: # if z_pt is the reference and there is some ablation # then hs1 and hs2 are adjusted to z_pt @@ -560,7 +560,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): [first_index, hs2_year.first_valid_index()])) - # if PT, hs1 and hs2 are all nan until station is reactivated, then + # if PT, hs1 and hs2 are all nan until station is reactivated, then first_day_of_year = pd.to_datetime(str(y)+'-01-01') if len(z[first_day_of_year:first_index-pd.to_timedelta('1D')])>0: @@ -568,8 +568,8 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): hs1[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \ hs2[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all(): if (~np.isnan(np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])) \ - and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))): - logger.debug(' ======= adjusting hs1 and hs2 to z_pt') + and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))): + logger.debug(' ======= adjusting hs1 and hs2 to z_pt') if ~np.isnan(np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) ): hs1[first_index:] = hs1[first_index:] \ - np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) \ @@ -580,23 +580,23 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): + np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')]) # adjustment taking place at the end of the ablation period - if (ind_end[i] != -999): + if (ind_end[i] != -999): # if y == 2023: # import pdb; pdb.set_trace() # if there's ablation and # if there are PT data available at the end of the melt season if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any(): logger.debug('adjusting hs2 to z') - # then we adjust hs2 to the end-of-ablation z + # then we adjust hs2 to the end-of-ablation z # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): + if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): logger.debug('using end of melt season') hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) + \ np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) # if not possible, then trying the end of the following accumulation season elif (i+1 < len(ind_start)): - if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): + if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): logger.debug('using end of accumulation season') hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \ @@ -614,7 +614,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs1_following_winter)): logger.debug('to hs1') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan @@ -628,12 +628,12 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy() # adjusting hs1 to hs2 (no ablation case) if any(~np.isnan(hs1_following_winter)): - logger.debug('adjusting hs1') + logger.debug('adjusting hs1') # and if there are some hs2 during the accumulation period if any(~np.isnan(hs2_following_winter)): logger.debug('to hs2') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan @@ -652,7 +652,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs2_following_winter)): logger.debug('to hs2, minimizing winter difference') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available tmp1 = hs1.iloc[ind_end[i]:min(len(hs1),ind_end[i]+24*30*9)].copy() tmp2 = hs2.iloc[ind_end[i]:min(len(hs2),ind_end[i]+24*30*9)].copy() @@ -670,15 +670,15 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # if no hs2, then use PT data available at the end of the melt season elif np.any(~np.isnan(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*7)])): logger.debug('to z') - # then we adjust hs2 to the end-of-ablation z + # then we adjust hs2 to the end-of-ablation z # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])): + if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])): logger.debug('using end of melt season') hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)]) + \ np.nanmean(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)]) # if not possible, then trying the end of the following accumulation season - elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])): + elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])): logger.debug('using end of accumulation season') hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ np.nanmean(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]) + \ @@ -686,7 +686,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): elif any(~np.isnan(hs2_year)): logger.debug('to the last value of hs2') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available half_span = pd.to_timedelta('7D') tmp1 = hs1_year.loc[(hs2_year.last_valid_index()-half_span):(hs2_year.last_valid_index()+half_span)].copy() @@ -702,25 +702,25 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df["z_surf_combined"] = np.nan # in winter, both SR1 and SR2 are used - df["z_surf_combined"] = df["z_surf_2_adj"].interpolate(limit=72).values - + df["z_surf_combined"] = df["z_surf_2_adj"].interpolate(limit=72).values + # in ablation season we use SR2 instead of the SR1&2 average # here two options: # 1) we ignore the SR1 and only use SR2 - # 2) we use SR1 when SR2 is not available (commented) + # 2) we use SR1 when SR2 is not available (commented) # the later one can cause jumps when SR2 starts to be available few days after SR1 data_update = df[["z_surf_1_adj", "z_surf_2_adj"]].mean(axis=1).values - - ind_update = ~ind_ablation + + ind_update = ~ind_ablation #ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] + df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] # in ablation season we use pressure transducer over all other options - data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values + data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] - + df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] + logger.info('surface height combination finished') return df['z_surf_combined'], df["z_ice_surf_adj"], df["z_surf_1_adj"], df["z_surf_2_adj"] @@ -730,7 +730,7 @@ def hampel(vals_orig, k=7*24, t0=15): k: size of window (including the sample; 7 is equal to 3 on either side of value) ''' #Make copy so original not edited - vals=vals_orig.copy() + vals=vals_orig.copy() #Hampel Filter L= 1.4826 rolling_median=vals.rolling(k).median() @@ -744,19 +744,19 @@ def hampel(vals_orig, k=7*24, t0=15): def get_thermistor_depth(df_in, site, station_config): - '''Calculates the depth of the thermistors through time based on their + '''Calculates the depth of the thermistors through time based on their installation depth (collected in a google sheet) and on the change of surface height: instruments getting buried under new snow or surfacing due to ablation. There is a potential for additional filtering of thermistor data for surfaced (or just noisy) thermistors, but that is currently deactivated because slow. - + Parameters ---------- df_in : pandas:dataframe - dataframe containing the ice/firn temperature t_i_* as well as the + dataframe containing the ice/firn temperature t_i_* as well as the combined surface height z_surf_combined site : str - stid, so that maintenance date and sensor installation depths can be found + stid, so that maintenance date and sensor installation depths can be found in database station_config : dict potentially containing the key string_maintenance @@ -768,17 +768,17 @@ def get_thermistor_depth(df_in, site, station_config): # Add more entries as needed ] ''' - + temp_cols_name = ['t_i_'+str(i) for i in range(12) if 't_i_'+str(i) in df_in.columns] num_therm = len(temp_cols_name) - depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)] - + depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)] + if df_in['z_surf_combined'].isnull().all(): logger.info('No valid surface height at '+site+', cannot calculate thermistor depth') df_in[depth_cols_name + ['t_i_10m']] = np.nan else: logger.info('Calculating thermistor depth') - + # Convert maintenance_info to DataFrame for easier manipulation maintenance_string = pd.DataFrame( station_config.get("string_maintenance",[]), @@ -786,14 +786,14 @@ def get_thermistor_depth(df_in, site, station_config): ) maintenance_string["date"] = pd.to_datetime(maintenance_string["date"]) maintenance_string = maintenance_string.sort_values(by='date', ascending=True) - + if num_therm == 8: ini_depth = [1, 2, 3, 4, 5, 6, 7, 10] else: ini_depth = [0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5] df_in[depth_cols_name] = np.nan - + # filtering the surface height surface_height = df_in["z_surf_combined"].copy() ind_filter = surface_height.rolling(window=14, center=True).var() > 0.1 @@ -801,7 +801,7 @@ def get_thermistor_depth(df_in, site, station_config): surface_height[ind_filter] = np.nan df_in["z_surf_combined"] = surface_height.values z_surf_interp = df_in["z_surf_combined"].interpolate() - + # first initialization of the depths for i, col in enumerate(depth_cols_name): df_in[col] = ( @@ -809,18 +809,18 @@ def get_thermistor_depth(df_in, site, station_config): + z_surf_interp.values - z_surf_interp[z_surf_interp.first_valid_index()] ) - + # reseting depth at maintenance if len(maintenance_string.date) == 0: logger.info("No maintenance at "+site) - + for date in maintenance_string.date: if date > z_surf_interp.last_valid_index(): continue new_depth = maintenance_string.loc[ maintenance_string.date == date ].installation_depths.values[0] - + for i, col in enumerate(depth_cols_name[:len(new_depth)]): tmp = df_in[col].copy() tmp.loc[date:] = ( @@ -831,11 +831,11 @@ def get_thermistor_depth(df_in, site, station_config): ] ) df_in[col] = tmp.values - + # % Filtering thermistor data for i in range(len(temp_cols_name)): tmp = df_in[temp_cols_name[i]].copy() - + # variance filter # ind_filter = ( # df_in[temp_cols_name[i]] @@ -850,7 +850,7 @@ def get_thermistor_depth(df_in, site, station_config): # ind_filter.loc[np.isin(month, [5, 6, 7])] = False # if any(ind_filter): # tmp.loc[ind_filter] = np.nan - + # before and after maintenance adaptation filter if len(maintenance_string.date) > 0: for date in maintenance_string.date: @@ -866,7 +866,7 @@ def get_thermistor_depth(df_in, site, station_config): ) < np.timedelta64(7, "D") if any(ind_adapt): tmp.loc[ind_adapt] = np.nan - + # surfaced thermistor ind_pos = df_in[depth_cols_name[i]] < 0.1 if any(ind_pos): @@ -874,7 +874,7 @@ def get_thermistor_depth(df_in, site, station_config): # copying the filtered values to the original table df_in[temp_cols_name[i]] = tmp.values - + # removing negative depth df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan logger.info("interpolating 10 m firn/ice temperature") @@ -885,7 +885,7 @@ def get_thermistor_depth(df_in, site, station_config): kind="linear", min_diff_to_depth=1.5, ).set_index('date').values - + # filtering ind_pos = df_in["t_i_10m"] > 0.1 ind_low = df_in["t_i_10m"] < -70 @@ -897,12 +897,12 @@ def get_thermistor_depth(df_in, site, station_config): def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=2, kind="quadratic"): - '''Calculates the depth of the thermistors through time based on their + '''Calculates the depth of the thermistors through time based on their installation depth (collected in a google sheet) and on the change of surface height: instruments getting buried under new snow or surfacing due to ablation. There is a potential for additional filtering of thermistor data for surfaced (or just noisy) thermistors, but that is currently deactivated because slow. - + Parameters ---------- dates : numpy.array @@ -959,20 +959,20 @@ def gps_coordinate_postprocessing(ds, var, station_config={}): static_value = float(ds.attrs[coord_names[var_out]]) else: static_value = np.nan - + # if there is no gps observations, then we use the static value repeated # for each time stamp - if var not in ds.data_vars: + if var not in ds.data_vars: print('no',var,'at', ds.attrs['station_id']) return np.ones_like(ds['t_u'].data)*static_value - + if ds[var].isnull().all(): print('no',var,'at',ds.attrs['station_id']) return np.ones_like(ds['t_u'].data)*static_value - + # Extract station relocations from the config dict station_relocations = station_config.get("station_relocation", []) - + # Convert the ISO8601 strings to pandas datetime objects breaks = [pd.to_datetime(date_str) for date_str in station_relocations] if len(breaks)==0: @@ -981,16 +981,16 @@ def gps_coordinate_postprocessing(ds, var, station_config={}): logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks])) return piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks) - + def piecewise_smoothing_and_interpolation(data_series, breaks): - '''Smoothes, inter- or extrapolate the GPS observations. The processing is - done piecewise so that each period between station relocations are done - separately (no smoothing of the jump due to relocation). Piecewise linear - regression is then used to smooth the available observations. Then this - smoothed curve is interpolated linearly over internal gaps. Eventually, this - interpolated curve is extrapolated linearly for timestamps before the first + '''Smoothes, inter- or extrapolate the GPS observations. The processing is + done piecewise so that each period between station relocations are done + separately (no smoothing of the jump due to relocation). Piecewise linear + regression is then used to smooth the available observations. Then this + smoothed curve is interpolated linearly over internal gaps. Eventually, this + interpolated curve is extrapolated linearly for timestamps before the first valid measurement and after the last valid measurement. - + Parameters ---------- data_series : pandas.Series @@ -998,7 +998,7 @@ def piecewise_smoothing_and_interpolation(data_series, breaks): breaks: list List of timestamps of station relocation. First and last item should be None so that they can be used in slice(breaks[i], breaks[i+1]) - + Returns ------- np.ndarray @@ -1009,44 +1009,44 @@ def piecewise_smoothing_and_interpolation(data_series, breaks): _inferred_series = [] for i in range(len(breaks) - 1): df = data_series.loc[slice(breaks[i], breaks[i+1])] - + # Drop NaN values and calculate the number of segments based on valid data df_valid = df.dropna() - if df_valid.shape[0] > 2: + if df_valid.shape[0] > 2: # Fit linear regression model to the valid data range x = pd.to_numeric(df_valid.index).values.reshape(-1, 1) y = df_valid.values.reshape(-1, 1) - + model = LinearRegression() model.fit(x, y) - + # Predict using the model for the entire segment range x_pred = pd.to_numeric(df.index).values.reshape(-1, 1) - + y_pred = model.predict(x_pred) df = pd.Series(y_pred.flatten(), index=df.index) # adds to list the predicted values for the current segment _inferred_series.append(df) - + df_all = pd.concat(_inferred_series) - + # Fill internal gaps with linear interpolation df_all = df_all.interpolate(method='linear', limit_area='inside') - + # Remove duplicate indices and return values as numpy array df_all = df_all[~df_all.index.duplicated(keep='last')] return df_all.values - -def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, - kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, - gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, - bb=0.75, cc=5., dd=0.35, R_d=287.05): - '''Calculate latent and sensible heat flux using the bulk calculation - method - + +def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, + kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, + gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, + bb=0.75, cc=5., dd=0.35, R_d=287.05): + '''Calculate latent and sensible heat flux using the bulk calculation + method + Parameters ---------- - T_0 : int + T_0 : int Freezing point temperature T_h : xarray.DataArray Air temperature @@ -1065,55 +1065,55 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, p_h : xarray.DataArray Air pressure kappa : int - Von Karman constant (0.35-0.42). Default is 0.4. + Von Karman constant (0.35-0.42). Default is 0.4. WS_lim : int - Default is 1. + Default is 1. z_0 : int - Aerodynamic surface roughness length for momention, assumed constant + Aerodynamic surface roughness length for momention, assumed constant for all ice/snow surfaces. Default is 0.001. - g : int - Gravitational acceleration (m/s2). Default is 9.82. - es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. - eps : int + g : int + Gravitational acceleration (m/s2). Default is 9.82. + es_0 : int + Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + eps : int Ratio of molar masses of vapor and dry air (0.622). gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. - L_sub : int + L_sub : int Latent heat of sublimation (J/kg). Default is 2.83e6. - L_dif_max : int - Default is 0.01. + L_dif_max : int + Default is 0.01. c_pd : int Specific heat of dry air (J/kg/K). Default is 1005.. - aa : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + aa : int + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.7. - bb : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + bb : int + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.75. cc : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + Flux profile correction constants (Holtslag & De Bruin '88). Default is 5. dd : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.35. - R_d : int + R_d : int Gas constant of dry air. Default is 287.05. - + Returns ------- SHF_h : xarray.DataArray Sensible heat flux LHF_h : xarray.DataArray Latent heat flux - ''' - rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density - nu = calculate_viscosity(T_h, T_0, rho_atm) # Calculate kinematic viscosity - + ''' + rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density + nu = calculate_viscosity(T_h, T_0, rho_atm) # Calculate kinematic viscosity + SHF_h = xr.zeros_like(T_h) # Create empty xarrays LHF_h = xr.zeros_like(T_h) L = xr.full_like(T_h, 1E5) - + u_star = kappa * WS_h.where(WS_h>0) / np.log(z_WS / z_0) # Rough surfaces, from Smeets & Van den Broeke 2008 Re = u_star * z_0 / nu z_0h = u_star @@ -1126,12 +1126,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * (1 - (Tsurf_h + T_0) / T_0) + np.log10(es_0)) q_surf = eps * es_ice_surf / (p_h - (1 - eps) * es_ice_surf) - theta = T_h + z_T *g / c_pd + theta = T_h + z_T *g / c_pd stable = (theta > Tsurf_h) & (WS_h > WS_lim) unstable = (theta < Tsurf_h) & (WS_h > WS_lim) #TODO: check if unstable = ~stable? And if not why not - #no_wind = (WS_h <= WS_lim) + #no_wind = (WS_h <= WS_lim) # Calculate stable stratification - for i in np.arange(0,31): + for i in np.arange(0,31): psi_m1 = -(aa* z_0/L[stable] + bb*( z_0/L[stable]-cc/dd)*np.exp(-dd* z_0/L[stable]) + bb*cc/dd) psi_m2 = -(aa*z_WS[stable]/L[stable] + bb*(z_WS[stable]/L[stable]-cc/dd)*np.exp(-dd*z_WS[stable]/L[stable]) + bb*cc/dd) psi_h1 = -(aa*z_0h[stable]/L[stable] + bb*(z_0h[stable]/L[stable]-cc/dd)*np.exp(-dd*z_0h[stable]/L[stable]) + bb*cc/dd) @@ -1139,8 +1139,8 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, u_star[stable] = kappa*WS_h[stable]/(np.log(z_WS[stable]/z_0)-psi_m2+psi_m1) Re[stable] = u_star[stable]*z_0/nu[stable] z_0h[stable] = z_0*np.exp(1.5-0.2*np.log(Re[stable])-0.11*(np.log(Re[stable]))**2) - - # If n_elements(where(z_0h[stable] < 1e-6)) get 1 then + + # If n_elements(where(z_0h[stable] < 1e-6)) get 1 then # z_0h[stable[where(z_0h[stable] < 1e-6)]] = 1e-6 z_0h[stable][z_0h[stable] < 1E-6] == 1E-6 th_star = kappa \ @@ -1156,12 +1156,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * (1 + ((1-eps) / eps) * q_h[stable]) \ / (g * kappa * th_star * (1 + ((1-eps)/eps) * q_star)) L_dif = np.abs((L_prev-L[stable])/L_prev) - + # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break - # Calculate unstable stratification + # Calculate unstable stratification if len(unstable) > 0: for i in np.arange(0,21): x1 = (1-gamma*z_0 /L[unstable])**0.25 @@ -1176,8 +1176,8 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, Re[unstable] = u_star[unstable]*z_0/nu[unstable] z_0h[unstable] = z_0 * np.exp(1.5 - 0.2 * np.log(Re[unstable]) - 0.11 \ * (np.log(Re[unstable]))**2) - - # If n_elements(where(z_0h[unstable] < 1e-6)) > 1 then + + # If n_elements(where(z_0h[unstable] < 1e-6)) > 1 then # z_0h[unstable[where(z_0h[unstable] < 1e-6)]] = 1e-6 z_0h[stable][z_0h[stable] < 1E-6] == 1E-6 th_star = kappa * (theta[unstable] - Tsurf_h[unstable]) \ @@ -1191,7 +1191,7 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * ( 1 + ((1-eps) / eps) * q_h[unstable]) \ / (g * kappa * th_star * ( 1 + ((1-eps) / eps) * q_star)) L_dif = abs((L_prev-L[unstable])/L_prev) - + # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break @@ -1199,12 +1199,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, HF_nan = np.isnan(p_h) | np.isnan(T_h) | np.isnan(Tsurf_h) \ | np.isnan(q_h) | np.isnan(WS_h) | np.isnan(z_T) SHF_h[HF_nan] = np.nan - LHF_h[HF_nan] = np.nan + LHF_h[HF_nan] = np.nan return SHF_h, LHF_h -def calculate_viscosity(T_h, T_0, rho_atm): +def calculate_viscosity(T_h, T_0, rho_atm): '''Calculate kinematic viscosity of air - + Parameters ---------- T_h : xarray.DataArray @@ -1213,7 +1213,7 @@ def calculate_viscosity(T_h, T_0, rho_atm): Steam point temperature rho_atm : xarray.DataArray Surface temperature - + Returns ------- xarray.DataArray @@ -1221,15 +1221,15 @@ def calculate_viscosity(T_h, T_0, rho_atm): ''' # Dynamic viscosity of air in Pa s (Sutherlands' equation using C = 120 K) mu = 18.27e-6 * (291.15 + 120) / ((T_h + T_0) + 120) * ((T_h + T_0) / 291.15)**1.5 - + # Kinematic viscosity of air in m^2/s - return mu / rho_atm + return mu / rho_atm -def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, eps=0.622): +def calculate_specific_humidity(T_0, T_100, T_h, p_h, rh_h_wrt_ice_or_water, es_0=6.1071, es_100=1013.246, eps=0.622): '''Calculate specific humidity Parameters ---------- - T_0 : float + T_0 : float Steam point temperature. Default is 273.15. T_100 : float Steam point temperature in Kelvin @@ -1237,20 +1237,20 @@ def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_ Air temperature p_h : xarray.DataArray Air pressure - RH_cor_h : xarray.DataArray + rh_h_wrt_ice_or_water : xarray.DataArray Relative humidity corrected es_0 : float Saturation vapour pressure at the melting point (hPa) es_100 : float Saturation vapour pressure at steam point temperature (hPa) - eps : int + eps : int ratio of molar masses of vapor and dry air (0.622) - + Returns ------- xarray.DataArray Specific humidity data array - ''' + ''' # Saturation vapour pressure above 0 C (hPa) es_wtr = 10**(-7.90298 * (T_100 / (T_h + T_0) - 1) + 5.02808 * np.log10(T_100 / (T_h + T_0)) - 1.3816E-7 * (10**(11.344 * (1 - (T_h + T_0) / T_100)) - 1) @@ -1260,21 +1260,21 @@ def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_ es_ice = 10**(-9.09718 * (T_0 / (T_h + T_0) - 1) - 3.56654 * np.log10(T_0 / (T_h + T_0)) + 0.876793 * (1 - (T_h + T_0) / T_0) - + np.log10(es_0)) + + np.log10(es_0)) # Specific humidity at saturation (incorrect below melting point) - q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr) - + q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr) + # Replace saturation specific humidity values below melting point - freezing = T_h < 0 + freezing = T_h < 0 q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing]) - + q_nan = np.isnan(T_h) | np.isnan(p_h) q_sat[q_nan] = np.nan # Convert to kg/kg - return RH_cor_h * q_sat / 100 - -if __name__ == "__main__": - # unittest.main() - pass + return rh_h_wrt_ice_or_water * q_sat / 100 + +if __name__ == "__main__": + # unittest.main() + pass diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 2280901e..4a402169 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -51,7 +51,7 @@ def resample_dataset(ds_h, t): # taking the 10 min data and using it as instantaneous values: is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) if (t == '60min') and is_10_minutes_timestamp.any(): - cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_wrt_ice_or_water', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() timestamp_round_hour = df_d.index timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) @@ -95,8 +95,8 @@ def resample_dataset(ds_h, t): df_d[var] = (p_vap.to_series().resample(t).mean() \ / es_wtr.to_series().resample(t).mean())*100 - if var+'_cor' in df_d.keys(): - df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ + if var+'_wrt_ice_or_water' in df_d.keys(): + df_d[var+'_wrt_ice_or_water'] = (p_vap.to_series().resample(t).mean() \ / es_cor.to_series().resample(t).mean())*100 # passing each variable attribute to the ressample dataset diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index f65cad52..dfb26b77 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -11,8 +11,6 @@ def clip_values( ): """ Clip values in dataset to defined "hi" and "lo" variables from dataframe. - There is a special treatment here for rh_u and rh_l variables, where values - are clipped and not assigned to NaN. This is for replication purposes Parameters ---------- diff --git a/src/pypromice/resources/variable_aliases_GC-Net.csv b/src/pypromice/resources/variable_aliases_GC-Net.csv index afac35f7..32a51c1b 100644 --- a/src/pypromice/resources/variable_aliases_GC-Net.csv +++ b/src/pypromice/resources/variable_aliases_GC-Net.csv @@ -5,10 +5,10 @@ p_l, t_u,TA2 t_l,TA1 rh_u,RH2 -rh_u_cor,RH2_cor +rh_u_wrt_ice_or_water,RH2_cor qh_u,Q2 rh_l,RH1 -rh_l_cor,RH1_cor +rh_l_wrt_ice_or_water,RH1_cor qh_l,Q1 wspd_u,VW2 wspd_l,VW1 diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index ec6a3b15..5b3f7714 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -6,10 +6,10 @@ p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,6 t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",all,1,1,1,4 t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",two-boom,1,1,1,4 rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,"",all,1,1,1,4 -rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",all,0,1,1,4 +rh_u_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (upper boom) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,FALSE,L2 or later,0,150,"",all,0,1,1,4 qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,"",all,0,1,1,4 rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,"",two-boom,1,1,1,4 -rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",two-boom,0,1,1,4 +rh_l_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (lower boom) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,FALSE,L2 or later,0,150,"",two-boom,0,1,1,4 qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u,all,1,1,1,4 wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l,two-boom,1,1,1,4 @@ -99,7 +99,7 @@ t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,phy p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time,TRUE,,-350,100,,all,1,1,1,4 t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time,TRUE,,-80,40,,all,1,1,1,4 rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,"",all,1,1,1,4 -rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 +rh_i_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (instantaneous) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4 From ede895d95c632802891f6d78159e89f3297ca034 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:41:26 +0100 Subject: [PATCH 03/18] fixing tests --- tests/unit/bufr_export/tx_l3_test1.csv | 2 +- tests/unit/test_value_clippping.py | 34 +++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/unit/bufr_export/tx_l3_test1.csv b/tests/unit/bufr_export/tx_l3_test1.csv index c9aac70c..c36e441d 100755 --- a/tests/unit/bufr_export/tx_l3_test1.csv +++ b/tests/unit/bufr_export/tx_l3_test1.csv @@ -1,4 +1,4 @@ -time,p_u,p_l,t_u,t_l,rh_u,rh_u_cor,qh_u,rh_l,rh_l_cor,qh_l,wspd_u,wspd_l,wdir_u,wdir_l,wspd_x_u,wspd_y_u,wspd_x_l,wspd_y_l,dsr,dsr_cor,usr,usr_cor,albedo,dlr,ulr,cc,t_surf,dlhf_u,dlhf_l,dshf_u,dshf_l,z_boom_u,z_boom_l,precip_u,precip_u_cor,precip_u_rate,precip_l,precip_l_cor,precip_l_rate,t_i_1,t_i_2,t_i_3,t_i_4,t_i_5,t_i_6,t_i_7,t_i_8,t_i_9,t_i_10,t_i_11,tilt_x,tilt_y,rot,gps_lat,gps_lon,gps_alt,gps_time,gps_hdop,batt_v,fan_dc_u,fan_dc_l,t_rad,p_i,t_i,rh_i,rh_i_cor,wspd_i,wdir_i,wspd_x_i,wspd_y_i,msg_i,msg_lat,msg_lon +time,p_u,p_l,t_u,t_l,rh_u,rh_u_wrt_ice_or_water,qh_u,rh_l,rh_l_wrt_ice_or_water,qh_l,wspd_u,wspd_l,wdir_u,wdir_l,wspd_x_u,wspd_y_u,wspd_x_l,wspd_y_l,dsr,dsr_cor,usr,usr_cor,albedo,dlr,ulr,cc,t_surf,dlhf_u,dlhf_l,dshf_u,dshf_l,z_boom_u,z_boom_l,precip_u,precip_u_cor,precip_u_rate,precip_l,precip_l_cor,precip_l_rate,t_i_1,t_i_2,t_i_3,t_i_4,t_i_5,t_i_6,t_i_7,t_i_8,t_i_9,t_i_10,t_i_11,tilt_x,tilt_y,rot,gps_lat,gps_lon,gps_alt,gps_time,gps_hdop,batt_v,fan_dc_u,fan_dc_l,t_rad,p_i,t_i,rh_i,rh_i_cor,wspd_i,wdir_i,wspd_x_i,wspd_y_i,msg_i,msg_lat,msg_lon 2023-12-01 00:00:00,784.5,785.8,-16.32,-16.3,77.87,91.2846,1.0583,80.1,93.8804,1.0887,16.33,15.27,113.2,122.8,15.0095,-6.4331,12.8355,-8.2719,-1.9282,0.0,0.5033,0.0,,182.6197,241.7566,0.3205,-17.134,-1.6006,1.6393,30.3492,31.1479,4.1967,2.946,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.6404,0.8809,218.9,66.482488,-46.29424,2135.0,21.0,0.9,12.72,35.15,11.81,-16.44,-213.9,-16.4,82.0,96.2012,15.48,122.3,13.0847,-8.2718,0.0,66.5026,-46.33518 2023-12-01 01:00:00,784.1,785.5,-15.82,-15.92,76.17,88.8564,1.0798,78.75,91.956,1.1052,15.85,14.83,112.3,126.2,14.6646,-6.0144,11.9672,-8.7587,-2.0466,0.0,0.7549,0.0,,178.4223,242.0001,0.2567,-17.034,-0.5022,2.1859,43.1898,40.0931,4.2047,2.9472,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.631,0.5809,225.6,66.482471,-46.29426,2124.0,10022.0,0.73,12.74,30.55,11.82,-16.06,-214.4,-16.2,79.4,92.9691,15.28,118.8,13.39,-7.3612,0.0,66.5026,-46.33518 2023-12-01 02:00:00,784.1,785.5,-15.55,-15.6,76.88,89.4484,1.1146,79.15,92.1345,1.1408,15.55,14.57,117.1,129.6,13.8428,-7.0837,11.2264,-9.2873,-2.0272,0.0,0.6131,0.0,,182.2089,243.3796,0.2862,-16.6921,-0.5438,2.18,40.0149,38.6836,4.1981,2.9441,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.3333,0.8761,231.1,66.482326,-46.294294,2121.0,20021.0,1.04,12.73,30.8,12.34,-15.74,-214.5,-15.6,79.0,91.9599,14.63,138.5,9.6941,-10.9572,0.0,66.50363,-46.20806 diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py index 52f33234..8e90655d 100644 --- a/tests/unit/test_value_clippping.py +++ b/tests/unit/test_value_clippping.py @@ -198,32 +198,32 @@ def test_circular_dependencies(self): check_dtype=True, ) - def test_rh_corrected(self): + def test_rh_adjusted(self): variable_config = pd.DataFrame( columns=["field", "lo", "hi", "OOL"], data=[ - ["rh_u", 0, 150, "rh_u_cor"], - ["rh_u_cor", 0, 150, ""], + ["rh_u", 0, 150, "rh_u_wrt_ice_or_water"], + ["rh_u_wrt_ice_or_water", 0, 150, ""], ], ).set_index("field") rows_input = [] rows_expected = [] # All values are within the expected range - rows_input.append(dict(rh_u=42, rh_u_cor=43)) - rows_expected.append(dict(rh_u=42, rh_u_cor=43)) - # rh_u is below range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship - rows_input.append(dict(rh_u=-10, rh_u_cor=3)) - rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) - # rh_u is within range, but rh_u_cor is below range; rh_u_cor should be flagged - rows_input.append(dict(rh_u=54, rh_u_cor=-4)) - rows_expected.append(dict(rh_u=54, rh_u_cor=np.nan)) - # rh_u is above range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship - rows_input.append(dict(rh_u=160, rh_u_cor=120)) - rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) - # rh_u is within range, but rh_u_cor is above range; rh_u_cor should be flagged - rows_input.append(dict(rh_u=100, rh_u_cor=255)) - rows_expected.append(dict(rh_u=100, rh_u_cor=np.nan)) + rows_input.append(dict(rh_u=42, rh_u_wrt_ice_or_water=43)) + rows_expected.append(dict(rh_u=42, rh_u_wrt_ice_or_water=43)) + # rh_u is below range, but rh_u_wrt_ice_or_water is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=-10, rh_u_wrt_ice_or_water=3)) + rows_expected.append(dict(rh_u=np.nan, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is within range, but rh_u_wrt_ice_or_water is below range; rh_u_wrt_ice_or_water should be flagged + rows_input.append(dict(rh_u=54, rh_u_wrt_ice_or_water=-4)) + rows_expected.append(dict(rh_u=54, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is above range, but rh_u_wrt_ice_or_water is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=160, rh_u_wrt_ice_or_water=120)) + rows_expected.append(dict(rh_u=np.nan, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is within range, but rh_u_wrt_ice_or_water is above range; rh_u_wrt_ice_or_water should be flagged + rows_input.append(dict(rh_u=100, rh_u_wrt_ice_or_water=255)) + rows_expected.append(dict(rh_u=100, rh_u_wrt_ice_or_water=np.nan)) # Prepare the data df_input = pd.DataFrame(rows_input, dtype=float) From 3bce759eeab31d130a0fa31b14833beca03d63a4 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:47:20 +0100 Subject: [PATCH 04/18] fixing adjustHumidity --- src/pypromice/process/L1toL2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index a8039017..4c848284 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -462,7 +462,7 @@ def adjustHumidity(rh, T, T_0, T_100, ews, ei0): #TODO fi # Set to Groff & Gratch values when freezing, otherwise just rh rh_wrt_ice_or_water = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) - return rh_cor + return rh_wrt_ice_or_water def correctPrecip(precip, wspd): From c39844b238f160f650c6b2f1f7d742d90e14f4a4 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Fri, 8 Nov 2024 12:17:09 +0100 Subject: [PATCH 05/18] fixed resample --- src/pypromice/process/resample.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 4a402169..598e6035 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -52,11 +52,12 @@ def resample_dataset(ds_h, t): is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) if (t == '60min') and is_10_minutes_timestamp.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_wrt_ice_or_water', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + cols_origin = ['p_u', 't_u', 'rh_u', 'rh_u_wrt_ice_or_water', 'wspd_u', 'wdir_u','wspd_x_u','wspd_y_u'] timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() timestamp_round_hour = df_d.index timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) - for col in cols_to_update: + for col, col_org in zip(cols_to_update, cols_origin): if col not in df_d.columns: df_d[col] = np.nan else: @@ -67,7 +68,7 @@ def resample_dataset(ds_h, t): timestamp_to_update = timestamp_to_update[missing_instantaneous] df_d.loc[timestamp_to_update, col] = ds_h.reindex( time= timestamp_to_update - )[col.replace('_i','_u')].values + )[col_org].values if col == 'p_i': df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000 From de45a9cd61bdca908a9fcc9d2e02d4c2f2648c46 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 12 Nov 2024 11:58:33 +0100 Subject: [PATCH 06/18] fixed unintended removal of rh_*_ice_or_water from the daily and monthly dataset --- src/pypromice/process/write.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index 77e24863..181ed045 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -100,14 +100,14 @@ def prepare_and_write( elif t == 86400: # removing instantaneous values from daily and monthly files for v in col_names: - if ("_i" in v) and ("_i_" not in v): + if v in ['p_i', 't_i', 'rh_i', 'wspd_i', 'wdir_i', 'wspd_x_i', 'wspd_y_i']: col_names.remove(v) out_csv = output_dir / f"{name}_day.csv" out_nc = output_dir / f"{name}_day.nc" else: # removing instantaneous values from daily and monthly files for v in col_names: - if ("_i" in v) and ("_i_" not in v): + if v in ['p_i', 't_i', 'rh_i', 'wspd_i', 'wdir_i', 'wspd_x_i', 'wspd_y_i']: col_names.remove(v) out_csv = output_dir / f"{name}_month.csv" out_nc = output_dir / f"{name}_month.nc" From 5d312ac71d1b928703a32c6e665b53c444e6033e Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:51:32 +0100 Subject: [PATCH 07/18] Fixed format test --- src/pypromice/process/aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 1d203ad4..e1285fee 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -94,7 +94,7 @@ def __init__( formats = {dataset.attrs["format"].lower() for dataset in self.L0} if "raw" in formats: self.format = "raw" - elif "STM" in formats: + elif "stm" in formats: self.format = "STM" elif "tx" in formats: self.format = "tx" From a77b7257880fc01c9b95fa5de08909d3af1bf622 Mon Sep 17 00:00:00 2001 From: Penny How Date: Tue, 12 Nov 2024 12:27:03 -0200 Subject: [PATCH 08/18] ORO added to gps_lon station exemption (#314) --- src/pypromice/process/write.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index 181ed045..b1e0abe4 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -484,7 +484,7 @@ def reformat_time(dataset): return dataset -def reformat_lon(dataset, exempt=["UWN", "Roof_GEUS", "Roof_PROMICE"]): +def reformat_lon(dataset, exempt=["UWN", "Roof_GEUS", "Roof_PROMICE", "ORO"]): """Switch gps_lon to negative values (degrees_east). We do this here, and NOT in addMeta, otherwise we switch back to positive when calling getMeta in joinL2""" From 125da671a881f25fdec0ff6195693d84a79b47bd Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:58:45 +0100 Subject: [PATCH 09/18] adjusting altitude filter --- src/pypromice/process/L1toL2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 4c848284..aa986b7a 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -92,7 +92,7 @@ def toL2( baseline_elevation = (ds.gps_alt.to_series().resample('MS').median() .reindex(ds.time.to_series().index, method='nearest') .ffill().bfill()) - mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() + mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) | ds.gps_alt.isnull() ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) # removing dlr and ulr that are missing t_rad @@ -421,9 +421,9 @@ def calcTilt(tilt_x, tilt_y, deg2rad): def adjustHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed '''Adjust relative humidity so that values are given with respect to - saturation over ice in subfreezing conditions, and with respect to - saturation over water (as given by the instrument) above the melting - point temperature. Saturation water vapors are calculated after + saturation over ice in subfreezing conditions, and with respect to + saturation over water (as given by the instrument) above the melting + point temperature. Saturation water vapors are calculated after Groff & Gratch method. Parameters From 087e48555e970b1f151f6f1d39feee1c5e79af4c Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:08:25 +0100 Subject: [PATCH 10/18] version number update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 307d9242..fa72edd3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pypromice", - version="1.4.4", + version="1.5.0", author="GEUS Glaciology and Climate", description="PROMICE/GC-Net data processing toolbox", long_description=long_description, From e87a363708be2b432874e5b75a092bafe9701409 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:09:02 +0100 Subject: [PATCH 11/18] Update conf.py --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 781076ac..3cd9ae9c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'GEUS Glaciology and Climate' # The full version, including alpha/beta/rc tags -release = '1.4.4' +release = '1.5.0' # -- General configuration --------------------------------------------------- From 0a9fb56edbb2521080238ac07dd197ad130eedeb Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 11 Sep 2024 08:47:52 +0200 Subject: [PATCH 12/18] adding compression --- src/pypromice/process/write.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index b1e0abe4..f3a4ce1a 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -171,7 +171,7 @@ def writeCSV(outfile, Lx, csv_order): def writeNC(outfile, Lx, col_names=None): - """Write data product to NetCDF file + """Write data product to NetCDF file with compression Parameters ---------- @@ -187,7 +187,10 @@ def writeNC(outfile, Lx, col_names=None): else: names = list(Lx.keys()) - Lx[names].to_netcdf(outfile, mode="w", format="NETCDF4", compute=True) + comp = dict(zlib=True, complevel=4) + encoding = {var: comp for var in names} + + Lx[names].to_netcdf(outfile, mode="w", format="NETCDF4", compute=True, encoding=encoding) def getColNames(vars_df, ds, remove_nan_fields=False): From 13b7e51c12974fdd460de6c17972efb0352f5f27 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 11 Sep 2024 08:59:07 +0200 Subject: [PATCH 13/18] increasing the time constrain on the reading of dataset Due to compression the the time required two read both hourly and 10 min data (tested at this line) increases to 1.3 s. Now putting it to 5 s max before an error is raised. --- tests/e2e/test_get_l2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_get_l2.py b/tests/e2e/test_get_l2.py index 2796358f..7926f689 100644 --- a/tests/e2e/test_get_l2.py +++ b/tests/e2e/test_get_l2.py @@ -96,7 +96,7 @@ def test_get_l2_raw(self): self.assertEqual(dataset.attrs["station_id"], station_id) self.assertIsInstance(dataset.attrs["date_created"], str) date_created = pd.to_datetime(dataset.attrs["date_created"]) - self.assertLess(t0 - date_created, datetime.timedelta(seconds=1)) + self.assertLess(t0 - date_created, datetime.timedelta(seconds=5)) self.assertEqual( dataset.attrs["date_issued"], dataset.attrs["date_created"] ) From 55e5cbac86efac109772df402e75b1afc0a279d5 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 21 Nov 2024 13:49:28 +0100 Subject: [PATCH 14/18] Added NetCDF zip compr to joined l3 products * Made the NetCDF zip compression optional * Only using compression on the final joined_l3 output --- src/pypromice/process/join_l3.py | 6 ++-- src/pypromice/process/write.py | 49 ++++++++++---------------------- 2 files changed, 18 insertions(+), 37 deletions(-) diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index b550a8c1..c5264d19 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -538,9 +538,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me v = pypromice.resources.load_variables(variables) m = pypromice.resources.load_metadata(metadata) if outpath is not None: - prepare_and_write(l3_merged, outpath, v, m, "60min") - prepare_and_write(l3_merged, outpath, v, m, "1D") - prepare_and_write(l3_merged, outpath, v, m, "M") +$ prepare_and_write(l3_merged, outpath, v, m, "60min", nc_compression=True) + prepare_and_write(l3_merged, outpath, v, m, "1D", nc_compression=True) + prepare_and_write(l3_merged, outpath, v, m, "M", nc_compression=True) return l3_merged, sorted_list_station_data diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index f3a4ce1a..e89d6a23 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -17,7 +17,13 @@ def prepare_and_write( - dataset, output_path: Path | str, vars_df=None, meta_dict=None, time="60min", resample=True + dataset, + output_path: Path | str, + vars_df=None, + meta_dict=None, + time="60min", + resample=True, + nc_compression:bool=False, ): """Prepare data with resampling, formating and metadata population; then write data to .nc and .csv hourly and daily files @@ -117,40 +123,11 @@ def prepare_and_write( writeCSV(out_csv, d2, col_names) # Write to netcdf file - writeNC(out_nc, d2, col_names) + writeNC(out_nc, d2, col_names, compression=nc_compression) logger.info(f"Written to {out_csv}") logger.info(f"Written to {out_nc}") -def writeAll(outpath, station_id, l3_h, l3_d, l3_m, csv_order=None): - """Write L3 hourly, daily and monthly datasets to .nc and .csv - files - - Parameters - ---------- - outpath : str - Output file path - station_id : str - Station name - l3_h : xr.Dataset - L3 hourly data - l3_d : xr.Dataset - L3 daily data - l3_m : xr.Dataset - L3 monthly data - csv_order : list, optional - List order of variables - """ - if not os.path.isdir(outpath): - os.mkdir(outpath) - outfile_h = os.path.join(outpath, station_id + "_hour") - outfile_d = os.path.join(outpath, station_id + "_day") - outfile_m = os.path.join(outpath, station_id + "_month") - for o, l in zip([outfile_h, outfile_d, outfile_m], [l3_h, l3_d, l3_m]): - writeCSV(o + ".csv", l, csv_order) - writeNC(o + ".nc", l) - - def writeCSV(outfile, Lx, csv_order): """Write data product to CSV file @@ -170,7 +147,7 @@ def writeCSV(outfile, Lx, csv_order): Lcsv.to_csv(outfile) -def writeNC(outfile, Lx, col_names=None): +def writeNC(outfile, Lx, col_names=None, compression=False): """Write data product to NetCDF file with compression Parameters @@ -187,8 +164,12 @@ def writeNC(outfile, Lx, col_names=None): else: names = list(Lx.keys()) - comp = dict(zlib=True, complevel=4) - encoding = {var: comp for var in names} + encoding = {var: dict() for var in names} + + if compression: + comp = dict(zlib=True, complevel=4) + for var in names: + encoding[var].update(comp) Lx[names].to_netcdf(outfile, mode="w", format="NETCDF4", compute=True, encoding=encoding) From 8a22757b0539123f45d4752fa785b05000f6da18 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 21 Nov 2024 13:50:16 +0100 Subject: [PATCH 15/18] Removed unused write methods from the AWS class I couldn't see any usages of writeArr, writeL2 and writeL3 in the repository. --- src/pypromice/process/aws.py | 38 ------------------------------------ 1 file changed, 38 deletions(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index e1285fee..2e7c6976 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -123,22 +123,6 @@ def process(self): self.getL2() self.getL3() - def writeL2(self, outpath): - """Write L2 data to .csv and .nc file""" - if os.path.isdir(outpath): - self.writeArr(self.L2, outpath) - else: - logger.info(f"Outpath f{outpath} does not exist. Unable to save to file") - pass - - def writeL3(self, outpath): - """Write L3 data to .csv and .nc file""" - if os.path.isdir(outpath): - self.writeArr(self.L3, outpath) - else: - logger.info(f"Outpath f{outpath} does not exist. Unable to save to file") - pass - def getL1(self): """Perform L0 to L1 data processing""" logger.info("Level 1 processing...") @@ -164,28 +148,6 @@ def getL3(self): logger.info("Level 3 processing...") self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments") - def writeArr(self, dataset, outpath, t=None): - """Write L3 data to .nc and .csv hourly and daily files - - Parameters - ---------- - dataset : xarray.Dataset - Dataset to write to file - outpath : str - Output directory - t : str - Resampling string. This is automatically defined based - on the data type if not given. The default is None. - """ - if t is not None: - write.prepare_and_write(dataset, outpath, self.vars, self.meta, t) - else: - f = [l.attrs["format"] for l in self.L0] - if "raw" in f or "STM" in f: - write.prepare_and_write(dataset, outpath, self.vars, self.meta, "10min") - else: - write.prepare_and_write(dataset, outpath, self.vars, self.meta, "60min") - def loadConfig(self, config_file, inpath): """Load configuration from .toml file From 53653551e95d4f2d35a33ba5858915eb68d67325 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 21 Nov 2024 13:52:15 +0100 Subject: [PATCH 16/18] fixup join_l3 --- src/pypromice/process/join_l3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index c5264d19..abfbe97a 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -538,7 +538,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me v = pypromice.resources.load_variables(variables) m = pypromice.resources.load_metadata(metadata) if outpath is not None: -$ prepare_and_write(l3_merged, outpath, v, m, "60min", nc_compression=True) + prepare_and_write(l3_merged, outpath, v, m, "60min", nc_compression=True) prepare_and_write(l3_merged, outpath, v, m, "1D", nc_compression=True) prepare_and_write(l3_merged, outpath, v, m, "M", nc_compression=True) return l3_merged, sorted_list_station_data From 595c1493835d877140b274443a52d97ce9e582b8 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 21 Nov 2024 13:53:51 +0100 Subject: [PATCH 17/18] Reverted time constrain from test --- tests/e2e/test_get_l2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_get_l2.py b/tests/e2e/test_get_l2.py index 7926f689..2796358f 100644 --- a/tests/e2e/test_get_l2.py +++ b/tests/e2e/test_get_l2.py @@ -96,7 +96,7 @@ def test_get_l2_raw(self): self.assertEqual(dataset.attrs["station_id"], station_id) self.assertIsInstance(dataset.attrs["date_created"], str) date_created = pd.to_datetime(dataset.attrs["date_created"]) - self.assertLess(t0 - date_created, datetime.timedelta(seconds=5)) + self.assertLess(t0 - date_created, datetime.timedelta(seconds=1)) self.assertEqual( dataset.attrs["date_issued"], dataset.attrs["date_created"] ) From 89665a5919995b979f09f0714342cb5d75d42535 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 21 Nov 2024 14:18:01 +0100 Subject: [PATCH 18/18] Updated e2e test to teste zlib compression --- tests/e2e/test_process.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/e2e/test_process.py b/tests/e2e/test_process.py index 2fd2d16c..ca684b41 100644 --- a/tests/e2e/test_process.py +++ b/tests/e2e/test_process.py @@ -207,6 +207,30 @@ def test_full_e2e(self): output_dataset = xr.load_dataset(output_path) self.check_global_attributes(output_dataset, output_rel_path) + # Check if the l3 datasets are compressed + if output_path.parent.parent.name == 'site_l3': + self.assertEqual(output_dataset['p_u'].encoding["zlib"], True, output_rel_path) + else: + self.assertEqual(output_dataset['p_u'].encoding["zlib"], False, output_rel_path) + + # Test if the l3 output netcdf files are compressed with zlib + for output_rel_path in [ + "station_l3/TEST1/TEST1_day.nc", + "station_l3/TEST1/TEST1_hour.nc", + "station_l3/TEST1/TEST1_month.nc", + "site_l3/SITE_01/SITE_01_day.nc", + "site_l3/SITE_01/SITE_01_hour.nc", + "site_l3/SITE_01/SITE_01_month.nc", + ]: + output_path = root / output_rel_path + output_dataset = xr.load_dataset(output_path) + for var in output_dataset.variables: + # %% + print(var, output_dataset[var].encoding) + continue + self.assertEqual(output_dataset[var].encoding["zlib"], True) + + def check_global_attributes(self, dataset: xr.Dataset, reference: str): attribute_keys = set(dataset.attrs.keys()) highly_recommended_global_attributes = {