From 77004aba4b4ef764dc19d01c31d10dd7eed98e01 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 27 Jun 2024 10:18:37 +0200 Subject: [PATCH 01/14] implemented gps postprocessing on top of the #262 This update: - clears up the SHF LHF calculation - find breaks in the gps coordinates (indicative of station relocation) - for each interval between breaks, smoothes, interpolate and extrapolate the gps coordinates new required package: statsmodel --- environment.yml | 1 + setup.py | 2 +- src/pypromice/process/L2toL3.py | 250 ++++++++++++++++---------- src/pypromice/resources/variables.csv | 180 +++++++++---------- 4 files changed, 243 insertions(+), 190 deletions(-) diff --git a/environment.yml b/environment.yml index b982311f..f2717aa9 100644 --- a/environment.yml +++ b/environment.yml @@ -72,6 +72,7 @@ dependencies: - setuptools=68.2.2=py38h06a4308_0 - six=1.16.0=pyh6c4a22f_0 - sqlite=3.41.2=h5eee18b_0 + - statsmodel=0.13.2=py39h2bbff1b_0 - tbb=2021.8.0=hdb19cb5_0 - threadpoolctl=3.4.0=pyhc1e730c_0 - tk=8.6.12=h1ccaba5_0 diff --git a/setup.py b/setup.py index 52a9b216..fb24b5f1 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "pypromice.qc.percentiles": ["thresholds.csv"], "pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"], }, - install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'], + install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodel==0.13.2'], # extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']}, entry_points={ 'console_scripts': [ diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 2e05ff04..0db1dffb 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -2,14 +2,18 @@ """ AWS Level 2 (L2) to Level 3 (L3) data processing """ +import pandas as pd import numpy as np import xarray as xr +from statsmodels.nonparametric.smoothers_lowess import lowess +import logging +logger = logging.getLogger(__name__) -def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, - es_100=1013.246): +def toL3(L2, T_0=273.15): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - - Sensible fluxes + - Turbulent fluxes + - smoothed and inter/extrapolated GPS coordinates Parameters @@ -18,24 +22,13 @@ def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, L2 AWS data T_0 : int Steam point temperature. Default is 273.15. - z_0 : int - Aerodynamic surface roughness length for momention, assumed constant - for all ice/snow surfaces. Default is 0.001. - R_d : int - Gas constant of dry air. Default is 287.05. - eps : int - Default is 0.622. - es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. - es_100 : int - Saturation vapour pressure at steam point temperature (hPa). Default is - 1013.246. ''' ds = L2 ds.attrs['level'] = 'L3' T_100 = _getTempK(T_0) # Get steam point temperature as K + # Turbulent heat flux calculation # Upper boom bulk calculation T_h_u = ds['t_u'].copy() # Copy for processing p_h_u = ds['p_u'].copy() @@ -45,54 +38,91 @@ def toL3(L2, T_0=273.15, z_0=0.001, R_d=287.05, eps=0.622, es_0=6.1071, z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - rho_atm_u = 100 * p_h_u / R_d / (T_h_u + T_0) # Calculate atmospheric density - nu_u = calcVisc(T_h_u, T_0, rho_atm_u) # Calculate kinematic viscosity - q_h_u = calcHumid(T_0, T_100, T_h_u, es_0, es_100, eps, # Calculate specific humidity - p_h_u, RH_cor_h_u) + q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + if not ds.attrs['bedrock']: - SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, rho_atm_u, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, nu_u, q_h_u, p_h_u) - SHF_h_u, LHF_h_u = cleanHeatFlux(SHF_h_u, LHF_h_u, T_h_u, Tsurf_h, p_h_u, # Clean heat flux values - WS_h_u, RH_cor_h_u, ds['z_boom_u']) + SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes + z_WS_u, z_T_u, q_h_u, p_h_u) + ds['dshf_u'] = (('time'), SHF_h_u.data) ds['dlhf_u'] = (('time'), LHF_h_u.data) + q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg - q_h_u = cleanSpHumid(q_h_u, T_h_u, Tsurf_h, p_h_u, RH_cor_h_u) # Clean sp.humid values ds['qh_u'] = (('time'), q_h_u.data) # Lower boom bulk calculation - if ds.attrs['number_of_booms']==2: - # ds['wdir_l'] = _calcWindDir(ds['wspd_x_l'], ds['wspd_y_l']) # Calculatate wind direction - + if ds.attrs['number_of_booms']==2: T_h_l = ds['t_l'].copy() # Copy for processing p_h_l = ds['p_l'].copy() WS_h_l = ds['wspd_l'].copy() RH_cor_h_l = ds['rh_l_cor'].copy() z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - - rho_atm_l = 100 * p_h_l / R_d / (T_h_l + T_0) # Calculate atmospheric density - nu_l = calcVisc(T_h_l, T_0, rho_atm_l) # Calculate kinematic viscosity - q_h_l = calcHumid(T_0, T_100, T_h_l, es_0, es_100, eps, # Calculate sp.humidity - p_h_l, RH_cor_h_l) + + q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity + if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, rho_atm_l, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, nu_l, q_h_l, p_h_l) - SHF_h_l, LHF_h_l = cleanHeatFlux(SHF_h_l, LHF_h_l, T_h_l, Tsurf_h, p_h_l, # Clean heat flux values - WS_h_l, RH_cor_h_l, ds['z_boom_l']) + SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + ds['dshf_l'] = (('time'), SHF_h_l.data) ds['dlhf_l'] = (('time'), LHF_h_l.data) q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg - q_h_l = cleanSpHumid(q_h_l, T_h_l, Tsurf_h, p_h_l, RH_cor_h_l) # Clean sp.humid values + ds['qh_l'] = (('time'), q_h_l.data) + + # Smoothing and inter/extrapolation of GPS coordinates + for var in ['gps_lat', 'gps_lon', 'gps_alt']: + ds[var.replace('gps_','')] = gpsCoordinatePostprocessing(ds, var) + # adding average coordinate as attribute + for v in ['lat','lon','alt']: + ds.attrs[v+'_avg'] = ds[v].mean(dim='time').item() return ds +def gpsCoordinatePostprocessing(ds, var): + # saving the static value of 'lat','lon' or 'alt' stored in attribute + # as it might be the only coordinate available for certain stations (e.g. bedrock) + var_out = var.replace('gps_','') + + if var_out == 'alt': + if 'altitude' in list(ds.attrs.keys()): + static_value = float(ds.attrs['altitude']) + else: + print('no standard altitude for', ds.attrs['station_id']) + static_value = np.nan + elif var_out == 'lat': + static_value = float(ds.attrs['latitude']) + elif var_out == 'lon': + static_value = float(ds.attrs['longitude']) + + # if there is no gps observations, then we use the static value repeated + # for each time stamp + if var not in ds.data_vars: + print('no',var,'at', ds.attrs['station_id']) + return ('time', np.ones_like(ds['t_u'].data)*static_value) + + if ds[var].isnull().all(): + print('no',var,'at',ds.attrs['station_id']) + return ('time', np.ones_like(ds['t_u'].data)*static_value) + + # here we detect potential relocation of the station in the form of a + # break in the general trend of the latitude, longitude and altitude + # in the future, this could/should be listed in an external file to + # avoid missed relocations or sensor issues interpreted as a relocation + if var == 'gps_alt': + _, breaks = find_breaks(ds[var].to_series(), alpha=8) + else: + _, breaks = find_breaks(ds[var].to_series(), alpha=6) + + # smoothing and inter/extrapolation of the coordinate + return ('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)) + -def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, +def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, - bb=0.75, cc=5., dd=0.35): + bb=0.75, cc=5., dd=0.35, R_d=287.05): '''Calculate latent and sensible heat flux using the bulk calculation method @@ -128,9 +158,14 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, g : int Gravitational acceleration (m/s2). Default is 9.82. es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + es_100 : int + Saturation vapour pressure at steam point temperature (hPa). Default is + 1013.246. eps : int Ratio of molar masses of vapor and dry air (0.622). + R_d : int + Gas constant of dry air. Default is 287.05. gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. L_sub : int @@ -151,6 +186,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, dd : int Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.35. + z_0 : int + Aerodynamic surface roughness length for momention, assumed constant + for all ice/snow surfaces. Default is 0.001. Returns ------- @@ -159,6 +197,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, LHF_h : xarray.DataArray Latent heat flux ''' + rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density + nu = calcVisc(T_h, T_0, rho_atm) # Calculate kinematic viscosity + SHF_h = xr.zeros_like(T_h) # Create empty xarrays LHF_h = xr.zeros_like(T_h) L = xr.full_like(T_h, 1E5) @@ -244,7 +285,11 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break - + + HF_nan = np.isnan(p_h) | np.isnan(T_h) | np.isnan(Tsurf_h) \ + | np.isnan(q_h) | np.isnan(WS_h) | np.isnan(z_T) + SHF_h[HF_nan] = np.nan + LHF_h[HF_nan] = np.nan return SHF_h, LHF_h def calcVisc(T_h, T_0, rho_atm): @@ -270,9 +315,8 @@ def calcVisc(T_h, T_0, rho_atm): # Kinematic viscosity of air in m^2/s return mu / rho_atm -def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h): +def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, eps=0.622): '''Calculate specific humidity - Parameters ---------- T_0 : float @@ -315,72 +359,80 @@ def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h): freezing = T_h < 0 q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing]) + q_nan = np.isnan(T_h) | np.isnan(p_h) + q_sat[q_nan] = np.nan + # Convert to kg/kg return RH_cor_h * q_sat / 100 -def cleanHeatFlux(SHF, LHF, T, Tsurf, p, WS, RH_cor, z_boom): - '''Find invalid heat flux data values and replace with NaNs, based on - air temperature, surface temperature, air pressure, wind speed, - corrected relative humidity, and boom height + +def find_breaks(df,alpha): + '''Detects potential relocation of the station from the GPS measurements. + The code first makes a forward linear interpolation of the coordinates and + then looks for important jumps in latitude, longitude and altitude. The jumps + that are higher than a given threshold (expressed as a multiple of the + standard deviation) are mostly caused by the station being moved during + maintenance. To avoid misclassification, only the jumps detected in May-Sept. + are kept. Parameters ---------- - SHF : xarray.DataArray - Sensible heat flux - LHF : xarray.DataArray - Latent heat flux - T : xarray.DataArray - Air temperature - Tsurf : xarray.DataArray - Surface temperature - p : xarray.DataArray - Air pressure - WS : xarray.DataArray - Wind speed - RH_cor : xarray.DataArray - Relative humidity corrected - z_boom : xarray.DataArray - Boom height - - Returns - ------- - SHF : xarray.DataArray - Sensible heat flux corrected - LHF : xarray.DataArray - Latent heat flux corrected + df : pandas.Series + series of observed latitude, longitude or elevation + alpha: float + coefficient to be applied to the the standard deviation of the daily + coordinate fluctuation ''' - HF_nan = np.isnan(p) | np.isnan(T) | np.isnan(Tsurf) \ - | np.isnan(RH_cor) | np.isnan(WS) | np.isnan(z_boom) - SHF[HF_nan] = np.nan - LHF[HF_nan] = np.nan - return SHF, LHF - -def cleanSpHumid(q_h, T, Tsurf, p, RH_cor): - '''Find invalid specific humidity data values and replace with NaNs, - based on air temperature, surface temperature, air pressure, - and corrected relative humidity + diff = df.resample('D').median().interpolate( + method='linear', limit_area='inside', limit_direction='forward').diff() + thresh = diff.std() * alpha + list_diff = diff.loc[diff.abs()>thresh].reset_index() + list_diff = list_diff.loc[list_diff.time.dt.month.isin([5,6,7,8,9])] + list_diff['year']=list_diff.time.dt.year + list_diff=list_diff.groupby('year').max() + return diff, [None]+list_diff.time.to_list()+[None] + + +def piecewise_smoothing_and_interpolation(df_in, breaks): + '''Smoothes, inter- or extrapolate the gps observations. The processing is + done piecewise so that each period between station relocation are done + separately (no smoothing of the jump due to relocation). Locally Weighted + Scatterplot Smoothing (lowess) is then used to smooth the available + observations. Then this smoothed curve is interpolated linearly over internal + gaps. Eventually, this interpolated curve is extrapolated linearly for + timestamps before the first valid measurement and after the last valid + measurement. Parameters ---------- - q_h : xarray.DataArray - Specific humidity - T : xarray.DataArray - Air temperature - Tsurf : xarray.DataArray - Surface temperature - p : xarray.DataArray - Air pressure - RH_cor : xarray.DataArray - Relative humidity corrected - - Returns - ------- - q_h : xarray.DataArray - Specific humidity corrected''' - q_nan = np.isnan(T) | np.isnan(RH_cor) | np.isnan(p) | np.isnan(Tsurf) - q_h[q_nan] = np.nan - return q_h - + df_in : pandas.Series + series of observed latitude, longitude or elevation + breaks: list + List of timestamps of station relocation. First and last item should be + None so that they can be used in slice(breaks[i], breaks[i+1]) + ''' + df_all = pd.Series() # dataframe gathering all the smoothed pieces + for i in range(len(breaks)-1): + df = df_in.loc[slice(breaks[i], breaks[i+1])].copy() + + y_sm = lowess(df, + pd.to_numeric(df.index), + is_sorted=True, frac=1/3, it=0, + ) + df.loc[df.notnull()] = y_sm[:,1] + df = df.interpolate(method='linear', limit_area='inside') + + last_valid_6_months = slice(df.last_valid_index()-pd.to_timedelta('180D'),None) + df.loc[last_valid_6_months] = (df.loc[last_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='forward', fill_value="extrapolate")).values + + first_valid_6_months = slice(None, df.first_valid_index()+pd.to_timedelta('180D')) + df.loc[first_valid_6_months] = (df.loc[first_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='backward', fill_value="extrapolate")).values + df_all=pd.concat((df_all, df)) + + df_all = df_all[~df_all.index.duplicated(keep='first')] + return df_all.values def _calcAtmosDens(p_h, R_d, T_h, T_0): # TODO: check this shouldn't be in this step somewhere '''Calculate atmospheric density''' diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index ab8e4317..77eaa7b8 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -1,91 +1,91 @@ field,standard_name,long_name,units,coverage_content_type,coordinates,instantaneous_hourly,where_to_find,lo,hi,OOL,station_type,L0,L2,L3,max_decimals -time,time,Time,yyyy-mm-dd HH:MM:SS,physicalMeasurement,time lat lon alt,,,,,,all,1,1,1, -rec,record,Record,-,referenceInformation,time lat lon alt,,L0 or L2,,,,all,1,1,0,0 -p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time lat lon alt,FALSE,,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,1,1,1,4 -p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time lat lon alt,FALSE,,650,1100,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 -t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,40,rh_u_cor cc dsr_cor usr_cor z_boom z_stake dshf_u dlhf_u qh_u,all,1,1,1,4 -t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,40,rh_l_cor z_boom_l dshf_l dlhf_l qh_l,two-boom,1,1,1,4 -rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time lat lon alt,FALSE,,0,100,rh_u_cor,all,1,1,1,4 -rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time lat lon alt,FALSE,L2 or later,0,150,dshf_u dlhf_u qh_u,all,0,1,1,4 -qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time lat lon alt,FALSE,L2 or later,0,100,,all,0,1,1,4 -rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time lat lon alt,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4 -rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time lat lon alt,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4 -qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time lat lon alt,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 -wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time lat lon alt,FALSE,,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,1,1,1,4 -wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time lat lon alt,FALSE,,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,1,1,1,4 -wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time lat lon alt,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4 -wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time lat lon alt,FALSE,L0 or L2,,,,one-boom,1,1,0,4 -wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time lat lon alt,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4 -wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time lat lon alt,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 -wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time lat lon alt,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 -wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time lat lon alt,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 -wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time lat lon alt,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 -dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time lat lon alt,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4 -dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time lat lon alt,FALSE,L2 or later,,,,all,0,1,1,4 -usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time lat lon alt,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4 -usr_cor,surface_upwelling_shortwave_flux_corrected,Upwelling shortwave radiation - corrected,W m-2,modelResult,time lat lon alt,FALSE,L2 or later,0,1000,,all,0,1,1,4 -albedo,surface_albedo,Albedo,-,modelResult,time lat lon alt,FALSE,L2 or later,,,,all,0,1,1,4 -dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,physicalMeasurement,time lat lon alt,FALSE,,50,500,albedo dsr_cor usr_cor cc t_surf,all,1,1,1,4 -ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,physicalMeasurement,time lat lon alt,FALSE,,50,500,t_surf,all,1,1,1,4 -cc,cloud_area_fraction,Cloud cover,%,modelResult,time lat lon alt,FALSE,L2 or later,,,,all,0,1,1,4 -t_surf,surface_temperature,Surface temperature,C,modelResult,time lat lon alt,FALSE,L2 or later,-80,40,dshf_u dlhf_u qh_u,all,0,1,1,4 -dlhf_u,surface_downward_latent_heat_flux,Latent heat flux (upper boom),W m-2,modelResult,time lat lon alt,FALSE,L3 or later,,,,all,0,0,1,4 -dlhf_l,surface_downward_latent_heat_flux,Latent heat flux (lower boom),W m-2,modelResult,time lat lon alt,FALSE,L3 or later,,,,two-boom,0,0,1,4 -dshf_u,surface_downward_sensible_heat_flux,Sensible heat flux (upper boom),W m-2,modelResult,time lat lon alt,FALSE,L3 or later,,,,all,0,0,1,4 -dshf_l,surface_downward_sensible_heat_flux,Sensible heat flux (lower boom),W m-2,modelResult,time lat lon alt,FALSE,L3 or later,,,,two-boom,0,0,1,4 -z_boom_u,distance_to_surface_from_boom,Upper boom height,m,physicalMeasurement,time lat lon alt,TRUE,,0.3,10,dshf_u dlhf_u qh_u,all,1,1,1,4 -z_boom_q_u,distance_to_surface_from_boom_quality,Upper boom height (quality),-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,all,1,1,0,4 -z_boom_l,distance_to_surface_from_boom,Lower boom height,m,physicalMeasurement,time lat lon alt,TRUE,,0.3,5,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 -z_boom_q_l,distance_to_surface_from_boom_quality,Lower boom height (quality),-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,two-boom,1,1,0,4 -z_stake,distance_to_surface_from_stake_assembly,Stake height,m,physicalMeasurement,time lat lon alt,TRUE,,0.3,8,,one-boom,1,1,1,4 -z_stake_q,distance_to_surface_from_stake_assembly_quality,Stake height (quality),-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,one-boom,1,1,0,4 -z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,physicalMeasurement,time lat lon alt,FALSE,,0,30,z_pt_cor,one-boom,1,1,1,4 -z_pt_cor,depth_of_pressure_transducer_in_ice_corrected,Depth of pressure transducer in ice - corrected,m,modelResult,time lat lon alt,FALSE,L2 or later,0,30,,one-boom,0,1,1,4 -precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,physicalMeasurement,time lat lon alt,TRUE,,0,,precip_u_cor precip_u_rate,all,1,1,1,4 -precip_u_cor,precipitation_corrected,Precipitation (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time lat lon alt,TRUE,L2 or later,0,,,all,0,1,1,4 -precip_u_rate,precipitation_rate,Precipitation rate (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time lat lon alt,TRUE,L2 or later,0,,,all,0,1,1,4 -precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,physicalMeasurement,time lat lon alt,TRUE,,0,,precip_l_cor precip_l_rate,two-boom,1,1,1,4 -precip_l_cor,precipitation_corrected,Precipitation (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time lat lon alt,TRUE,L2 or later,0,,,two-boom,0,1,1,4 -precip_l_rate,precipitation_rate,Precipitation rate (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time lat lon alt,TRUE,L2 or later,0,,,two-boom,0,1,1,4 -t_i_1,ice_temperature_at_t1,Ice temperature at sensor 1,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_2,ice_temperature_at_t2,Ice temperature at sensor 2,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_3,ice_temperature_at_t3,Ice temperature at sensor 3,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_4,ice_temperature_at_t4,Ice temperature at sensor 4,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_5,ice_temperature_at_t5,Ice temperature at sensor 5,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_6,ice_temperature_at_t6,Ice temperature at sensor 6,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_7,ice_temperature_at_t7,Ice temperature at sensor 7,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_8,ice_temperature_at_t8,Ice temperature at sensor 8,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,all,1,1,1,4 -t_i_9,ice_temperature_at_t9,Ice temperature at sensor 9,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,two-boom,1,1,1,4 -t_i_10,ice_temperature_at_t10,Ice temperature at sensor 10,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,two-boom,1,1,1,4 -t_i_11,ice_temperature_at_t11,Ice temperature at sensor 11,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,1,,two-boom,1,1,1,4 -tilt_x,platform_view_angle_x,Tilt to east,degrees,physicalMeasurement,time lat lon alt,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 -tilt_y,platform_view_angle_y,Tilt to north,degrees,physicalMeasurement,time lat lon alt,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 -rot,platform_azimuth_angle,Station rotation from true North,degrees,physicalMeasurement,time lat lon alt,FALSE,,0,360,,all,1,1,1,2 -gps_lat,gps_latitude,Latitude,degrees_north,coordinate,time lat lon alt,TRUE,,50,83,,all,1,1,1,6 -gps_lon,gps_longitude,Longitude,degrees_east,coordinate,time lat lon alt,TRUE,,5,70,,all,1,1,1,6 -gps_alt,gps_altitude,Altitude,m,coordinate,time lat lon alt,TRUE,,0,3000,,all,1,1,1,2 -gps_time,gps_time,GPS time,s,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,0,240000,,all,1,1,0, -gps_geoid,gps_geoid_separation,Height of EGM96 geoid over WGS84 ellipsoid,m,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,,,,one-boom,1,1,0, -gps_geounit,gps_geounit,GeoUnit,-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,all,1,1,0, -gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,all,1,1,0,2 -gps_numsat,gps_numsat,GPS number of satellites,-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,,1,1,0,0 -gps_q,gps_q,Quality,-,qualityInformation,time lat lon alt,TRUE,L0 or L2,,,,,1,1,0, -lat,gps_mean_latitude,GPS mean latitude (from all time-series),degrees,modelResult,time lat lon alt,TRUE,,,,,all,1,1,1,6 -lon,gps_mean_longitude,GPS mean longitude (from all time-series),degrees,modelResult,time lat lon alt,TRUE,,,,,all,1,1,1,6 -alt,gps_mean_altitude,GPS mean altitude (from all time-series),degrees,modelResult,time lat lon alt,TRUE,,,,,all,1,1,1,6 -batt_v,battery_voltage,Battery voltage,V,physicalMeasurement,time lat lon alt,TRUE,,0,30,,all,1,1,1,2 -batt_v_ini,,,-,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,0,30,,,1,1,0,2 -batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,0,30,,,1,1,0,2 -fan_dc_u,fan_current,Fan current (upper boom),mA,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,0,200,,all,1,1,0,2 -fan_dc_l,fan_current,Fan current (lower boom),mA,physicalMeasurement,time lat lon alt,TRUE,,0,200,,two-boom,1,1,0,2 -freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,physicalMeasurement,time lat lon alt,TRUE,L0 or L2,0,10000,precip_u,,1,1,0, -t_log,temperature_of_logger,Logger temperature,degrees_C,physicalMeasurement,time lat lon alt,TRUE,,-80,40,,one-boom,1,1,0,4 -t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,physicalMeasurement,time lat lon alt,FALSE,,-80,40,t_surf dlr ulr,all,1,1,1,4 -p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time lat lon alt,TRUE,,-350,100,,all,1,1,1,4 -t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time lat lon alt,TRUE,,-80,40,,all,1,1,1,4 -rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time lat lon alt,TRUE,,0,150,rh_i_cor,all,1,1,1,4 -rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time lat lon alt,TRUE,L2 or later,0,100,,all,0,1,1,4 -wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time lat lon alt,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 -wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time lat lon alt,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 -wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time lat lon alt,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 -wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time lat lon alt,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 \ No newline at end of file +time,time,Time,yyyy-mm-dd HH:MM:SS,physicalMeasurement,time,,,,,,all,1,1,1, +rec,record,Record,-,referenceInformation,time,,L0 or L2,,,,all,1,1,0,0 +p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time,FALSE,,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,1,1,1,4 +p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,650,1100,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_u_cor cc dsr_cor usr_cor z_boom z_stake dshf_u dlhf_u qh_u,all,1,1,1,4 +t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_l_cor z_boom_l dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,rh_u_cor,all,1,1,1,4 +rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_u dlhf_u qh_u,all,0,1,1,4 +qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,all,0,1,1,4 +rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4 +rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4 +qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 +wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,1,1,1,4 +wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,1,1,1,4 +wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4 +wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time,FALSE,L0 or L2,,,,one-boom,1,1,0,4 +wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4 +wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 +wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 +wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 +wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 +dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4 +dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4 +usr_cor,surface_upwelling_shortwave_flux_corrected,Upwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,0,1000,,all,0,1,1,4 +albedo,surface_albedo,Albedo,-,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,albedo dsr_cor usr_cor cc t_surf,all,1,1,1,4 +ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,t_surf,all,1,1,1,4 +cc,cloud_area_fraction,Cloud cover,%,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 +t_surf,surface_temperature,Surface temperature,C,modelResult,time,FALSE,L2 or later,-80,40,dshf_u dlhf_u qh_u,all,0,1,1,4 +dlhf_u,surface_downward_latent_heat_flux,Latent heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 +dlhf_l,surface_downward_latent_heat_flux,Latent heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 +dshf_u,surface_downward_sensible_heat_flux,Sensible heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 +dshf_l,surface_downward_sensible_heat_flux,Sensible heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 +z_boom_u,distance_to_surface_from_boom,Upper boom height,m,physicalMeasurement,time,TRUE,,0.3,10,dshf_u dlhf_u qh_u,all,1,1,1,4 +z_boom_q_u,distance_to_surface_from_boom_quality,Upper boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,4 +z_boom_l,distance_to_surface_from_boom,Lower boom height,m,physicalMeasurement,time,TRUE,,0.3,5,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +z_boom_q_l,distance_to_surface_from_boom_quality,Lower boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,two-boom,1,1,0,4 +z_stake,distance_to_surface_from_stake_assembly,Stake height,m,physicalMeasurement,time,TRUE,,0.3,8,,one-boom,1,1,1,4 +z_stake_q,distance_to_surface_from_stake_assembly_quality,Stake height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,one-boom,1,1,0,4 +z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,physicalMeasurement,time,FALSE,,0,30,z_pt_cor,one-boom,1,1,1,4 +z_pt_cor,depth_of_pressure_transducer_in_ice_corrected,Depth of pressure transducer in ice - corrected,m,modelResult,time,FALSE,L2 or later,0,30,,one-boom,0,1,1,4 +precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_u_cor precip_u_rate,all,1,1,1,4 +precip_u_cor,precipitation_corrected,Precipitation (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 +precip_u_rate,precipitation_rate,Precipitation rate (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 +precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_l_cor precip_l_rate,two-boom,1,1,1,4 +precip_l_cor,precipitation_corrected,Precipitation (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 +precip_l_rate,precipitation_rate,Precipitation rate (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 +t_i_1,ice_temperature_at_t1,Ice temperature at sensor 1,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_2,ice_temperature_at_t2,Ice temperature at sensor 2,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_3,ice_temperature_at_t3,Ice temperature at sensor 3,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_4,ice_temperature_at_t4,Ice temperature at sensor 4,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_5,ice_temperature_at_t5,Ice temperature at sensor 5,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_6,ice_temperature_at_t6,Ice temperature at sensor 6,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_7,ice_temperature_at_t7,Ice temperature at sensor 7,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_8,ice_temperature_at_t8,Ice temperature at sensor 8,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 +t_i_9,ice_temperature_at_t9,Ice temperature at sensor 9,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +t_i_10,ice_temperature_at_t10,Ice temperature at sensor 10,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +t_i_11,ice_temperature_at_t11,Ice temperature at sensor 11,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,two-boom,1,1,1,4 +tilt_x,platform_view_angle_x,Tilt to east,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 +tilt_y,platform_view_angle_y,Tilt to north,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 +rot,platform_azimuth_angle,Station rotation from true North,degrees,physicalMeasurement,time,FALSE,,0,360,,all,1,1,1,2 +gps_lat,gps_latitude,Latitude,degrees_north,coordinate,time,TRUE,,50,83,,all,1,1,1,6 +gps_lon,gps_longitude,Longitude,degrees_east,coordinate,time,TRUE,,5,70,,all,1,1,1,6 +gps_alt,gps_altitude,Altitude,m,coordinate,time,TRUE,,0,3000,,all,1,1,1,2 +gps_time,gps_time,GPS time,s,physicalMeasurement,time,TRUE,L0 or L2,0,240000,,all,1,1,0, +gps_geoid,gps_geoid_separation,Height of EGM96 geoid over WGS84 ellipsoid,m,physicalMeasurement,time,TRUE,L0 or L2,,,,one-boom,1,1,0, +gps_geounit,gps_geounit,GeoUnit,-,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0, +gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,2 +gps_numsat,gps_numsat,GPS number of satellites,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0,0 +gps_q,gps_q,Quality,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0, +lat,latitude_postprocessed,smoothed and interpolated latitude of station (best estimate),degrees_N,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +lon,longitude_postprocessed,smoothed and interpolated longitude of station (best estimate),degrees_E,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +alt,altitude_postprocessed,smoothed and interpolated altitude of station (best estimate),m,modelResult,time,TRUE,L3,,,,all,0,0,1,2 +batt_v,battery_voltage,Battery voltage,V,physicalMeasurement,time,TRUE,,0,30,,all,1,1,1,2 +batt_v_ini,,,-,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 +batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 +fan_dc_u,fan_current,Fan current (upper boom),mA,physicalMeasurement,time,TRUE,L0 or L2,0,200,,all,1,1,0,2 +fan_dc_l,fan_current,Fan current (lower boom),mA,physicalMeasurement,time,TRUE,,0,200,,two-boom,1,1,0,2 +freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,physicalMeasurement,time,TRUE,L0 or L2,0,10000,precip_u,,1,1,0, +t_log,temperature_of_logger,Logger temperature,degrees_C,physicalMeasurement,time,TRUE,,-80,40,,one-boom,1,1,0,4 +t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,physicalMeasurement,time,FALSE,,-80,40,t_surf dlr ulr,all,1,1,1,4 +p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time,TRUE,,-350,100,,all,1,1,1,4 +t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time,TRUE,,-80,40,,all,1,1,1,4 +rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,rh_i_cor,all,1,1,1,4 +rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 +wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 +wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 +wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 +wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 \ No newline at end of file From 94d7f9375323b199490899f267c419fbb65d67b7 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 27 Jun 2024 10:25:28 +0200 Subject: [PATCH 02/14] encoding info removal when reading netcdf --- src/pypromice/process/get_l2tol3.py | 4 ++-- src/pypromice/process/join_l2.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index 6f845287..c2897623 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -38,8 +38,8 @@ def get_l2tol3(inpath, outpath, variables, metadata): # Remove encoding attributes from NetCDF for varname in l2.variables: if l2[varname].encoding!={}: - l2[varname].encoding = {} - + l2[varname].encoding = {} + if 'bedrock' in l2.attrs.keys(): l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True' if 'number_of_booms' in l2.attrs.keys(): diff --git a/src/pypromice/process/join_l2.py b/src/pypromice/process/join_l2.py index 06f3d8c5..6a4a1109 100644 --- a/src/pypromice/process/join_l2.py +++ b/src/pypromice/process/join_l2.py @@ -29,7 +29,7 @@ def loadArr(infile): elif infile.split('.')[-1].lower() == 'nc': with xr.open_dataset(infile) as ds: ds.load() - # Remove encoding attributes from NetCDF + # Remove encoding attributes from NetCDF for varname in ds.variables: if ds[varname].encoding!={}: ds[varname].encoding = {} From 2251a2074b4ece4fb313a4f211274547684581a3 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 27 Jun 2024 10:53:10 +0200 Subject: [PATCH 03/14] typo in statsmodels --- environment.yml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index f2717aa9..96a569f4 100644 --- a/environment.yml +++ b/environment.yml @@ -72,7 +72,7 @@ dependencies: - setuptools=68.2.2=py38h06a4308_0 - six=1.16.0=pyh6c4a22f_0 - sqlite=3.41.2=h5eee18b_0 - - statsmodel=0.13.2=py39h2bbff1b_0 + - statsmodels=0.13.2=py39h2bbff1b_0 - tbb=2021.8.0=hdb19cb5_0 - threadpoolctl=3.4.0=pyhc1e730c_0 - tk=8.6.12=h1ccaba5_0 diff --git a/setup.py b/setup.py index fb24b5f1..ce8c9bd4 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "pypromice.qc.percentiles": ["thresholds.csv"], "pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"], }, - install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodel==0.13.2'], + install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.13.2'], # extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']}, entry_points={ 'console_scripts': [ From 29a7bde6173cd3517bd072b003e684e8d44b2bb3 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Fri, 28 Jun 2024 14:49:48 +0200 Subject: [PATCH 04/14] rework get_l2tol3 and l2tol3 - both functions now take the station_configuration config file as input - added some error handling in l2tol3-py when variables required for processing are not available - now breaks in station locations are listed in the station_configurations file - now the smoothed lat/lon/alt between two breaks (i.e. between two station relocation) is linear and not with lowess anymore --- src/pypromice/process/L2toL3.py | 234 ++++++++++++++++------------ src/pypromice/process/get_l2tol3.py | 6 +- 2 files changed, 136 insertions(+), 104 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 0db1dffb..b6699b1d 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -5,11 +5,13 @@ import pandas as pd import numpy as np import xarray as xr -from statsmodels.nonparametric.smoothers_lowess import lowess +import toml +from sklearn.linear_model import LinearRegression + import logging logger = logging.getLogger(__name__) -def toL3(L2, T_0=273.15): +def toL3(L2, T_0=273.15, config_folder='../aws-l0/metadata/station_configurations/'): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - Turbulent fluxes @@ -29,48 +31,68 @@ def toL3(L2, T_0=273.15): T_100 = _getTempK(T_0) # Get steam point temperature as K # Turbulent heat flux calculation - # Upper boom bulk calculation - T_h_u = ds['t_u'].copy() # Copy for processing - p_h_u = ds['p_u'].copy() - WS_h_u = ds['wspd_u'].copy() - RH_cor_h_u = ds['rh_u_cor'].copy() - Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? - z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer - z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer + if ('t_u' in ds.keys()) and \ + ('p_u' in ds.keys()) and \ + ('rh_u_cor' in ds.keys()): + # Upper boom bulk calculation + T_h_u = ds['t_u'].copy() # Copy for processing + p_h_u = ds['p_u'].copy() + RH_cor_h_u = ds['rh_u_cor'].copy() + + q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + if ('wspd_u' in ds.keys()) and \ + ('t_surf' in ds.keys()) and \ + ('z_boom_u' in ds.keys()): + WS_h_u = ds['wspd_u'].copy() + Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? + z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer + z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer + + if not ds.attrs['bedrock']: + SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes + z_WS_u, z_T_u, q_h_u, p_h_u) - q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity - - if not ds.attrs['bedrock']: - SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, q_h_u, p_h_u) - - ds['dshf_u'] = (('time'), SHF_h_u.data) - ds['dlhf_u'] = (('time'), LHF_h_u.data) + ds['dshf_u'] = (('time'), SHF_h_u.data) + ds['dlhf_u'] = (('time'), LHF_h_u.data) + else: + logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes') - q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg - ds['qh_u'] = (('time'), q_h_u.data) + q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg + ds['qh_u'] = (('time'), q_h_u.data) + else: + logger.info('t_u, p_u or rh_u_cor missing, cannot calulate tubrulent heat fluxes') # Lower boom bulk calculation if ds.attrs['number_of_booms']==2: - T_h_l = ds['t_l'].copy() # Copy for processing - p_h_l = ds['p_l'].copy() - WS_h_l = ds['wspd_l'].copy() - RH_cor_h_l = ds['rh_l_cor'].copy() - z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W - z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - - q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity - - if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, q_h_l, p_h_l) + if ('t_l' in ds.keys()) and \ + ('p_l' in ds.keys()) and \ + ('rh_l_cor' in ds.keys()): + T_h_l = ds['t_l'].copy() # Copy for processing + p_h_l = ds['p_l'].copy() + RH_cor_h_l = ds['rh_l_cor'].copy() - ds['dshf_l'] = (('time'), SHF_h_l.data) - ds['dlhf_l'] = (('time'), LHF_h_l.data) - q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg + q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity - ds['qh_l'] = (('time'), q_h_l.data) + if ('wspd_l' in ds.keys()) and \ + ('t_surf' in ds.keys()) and \ + ('z_boom_l' in ds.keys()): + z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W + z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer + WS_h_l = ds['wspd_l'].copy() + if not ds.attrs['bedrock']: + SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + + ds['dshf_l'] = (('time'), SHF_h_l.data) + ds['dlhf_l'] = (('time'), LHF_h_l.data) + else: + logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes') + q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg + ds['qh_l'] = (('time'), q_h_l.data) + else: + logger.info('t_l, p_l or rh_l_cor missing, cannot calulate tubrulent heat fluxes') + # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: ds[var.replace('gps_','')] = gpsCoordinatePostprocessing(ds, var) @@ -80,7 +102,7 @@ def toL3(L2, T_0=273.15): ds.attrs[v+'_avg'] = ds[v].mean(dim='time').item() return ds -def gpsCoordinatePostprocessing(ds, var): +def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/station_configurations/'): # saving the static value of 'lat','lon' or 'alt' stored in attribute # as it might be the only coordinate available for certain stations (e.g. bedrock) var_out = var.replace('gps_','') @@ -89,7 +111,7 @@ def gpsCoordinatePostprocessing(ds, var): if 'altitude' in list(ds.attrs.keys()): static_value = float(ds.attrs['altitude']) else: - print('no standard altitude for', ds.attrs['station_id']) + # print('no standard altitude for', ds.attrs['station_id']) static_value = np.nan elif var_out == 'lat': static_value = float(ds.attrs['latitude']) @@ -106,16 +128,22 @@ def gpsCoordinatePostprocessing(ds, var): print('no',var,'at',ds.attrs['station_id']) return ('time', np.ones_like(ds['t_u'].data)*static_value) - # here we detect potential relocation of the station in the form of a - # break in the general trend of the latitude, longitude and altitude - # in the future, this could/should be listed in an external file to - # avoid missed relocations or sensor issues interpreted as a relocation - if var == 'gps_alt': - _, breaks = find_breaks(ds[var].to_series(), alpha=8) - else: - _, breaks = find_breaks(ds[var].to_series(), alpha=6) + # fetching the station relocation dates at which the coordinates will/should + # have a break + config_file = config_folder +"/" + ds.attrs['station_id'] + ".toml" + with open(config_file, "r") as f: + config_data = toml.load(f) + + # Extract station relocations from the TOML data + station_relocations = config_data.get("station_relocation", []) - # smoothing and inter/extrapolation of the coordinate + # Convert the ISO8601 strings to pandas datetime objects + breaks = [pd.to_datetime(date_str) for date_str in station_relocations] + if len(breaks)==0: + logger.info('processing '+var+' without relocation') + else: + logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks])) + return ('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)) @@ -365,75 +393,77 @@ def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, ep # Convert to kg/kg return RH_cor_h * q_sat / 100 - -def find_breaks(df,alpha): - '''Detects potential relocation of the station from the GPS measurements. - The code first makes a forward linear interpolation of the coordinates and - then looks for important jumps in latitude, longitude and altitude. The jumps - that are higher than a given threshold (expressed as a multiple of the - standard deviation) are mostly caused by the station being moved during - maintenance. To avoid misclassification, only the jumps detected in May-Sept. - are kept. - - Parameters - ---------- - df : pandas.Series - series of observed latitude, longitude or elevation - alpha: float - coefficient to be applied to the the standard deviation of the daily - coordinate fluctuation - ''' - diff = df.resample('D').median().interpolate( - method='linear', limit_area='inside', limit_direction='forward').diff() - thresh = diff.std() * alpha - list_diff = diff.loc[diff.abs()>thresh].reset_index() - list_diff = list_diff.loc[list_diff.time.dt.month.isin([5,6,7,8,9])] - list_diff['year']=list_diff.time.dt.year - list_diff=list_diff.groupby('year').max() - return diff, [None]+list_diff.time.to_list()+[None] - - def piecewise_smoothing_and_interpolation(df_in, breaks): - '''Smoothes, inter- or extrapolate the gps observations. The processing is - done piecewise so that each period between station relocation are done - separately (no smoothing of the jump due to relocation). Locally Weighted - Scatterplot Smoothing (lowess) is then used to smooth the available - observations. Then this smoothed curve is interpolated linearly over internal - gaps. Eventually, this interpolated curve is extrapolated linearly for - timestamps before the first valid measurement and after the last valid - measurement. + '''Smoothes, inter- or extrapolate the GPS observations. The processing is + done piecewise so that each period between station relocations are done + separately (no smoothing of the jump due to relocation). Piecewise linear + regression is then used to smooth the available observations. Then this + smoothed curve is interpolated linearly over internal gaps. Eventually, this + interpolated curve is extrapolated linearly for timestamps before the first + valid measurement and after the last valid measurement. Parameters ---------- df_in : pandas.Series - series of observed latitude, longitude or elevation + Series of observed latitude, longitude or elevation with datetime index. breaks: list List of timestamps of station relocation. First and last item should be None so that they can be used in slice(breaks[i], breaks[i+1]) + + Returns + ------- + np.ndarray + Smoothed and interpolated values corresponding to the input series. ''' - df_all = pd.Series() # dataframe gathering all the smoothed pieces - for i in range(len(breaks)-1): + df_all = pd.Series(dtype=float) # Initialize an empty Series to gather all smoothed pieces + breaks = [None] + breaks + [None] + for i in range(len(breaks) - 1): df = df_in.loc[slice(breaks[i], breaks[i+1])].copy() + + # Drop NaN values and calculate the number of segments based on valid data + df_valid = df.dropna() + if df_valid.shape[0] > 2: + # Fit linear regression model to the valid data range + x = pd.to_numeric(df_valid.index).values.reshape(-1, 1) + y = df_valid.values.reshape(-1, 1) + + model = LinearRegression() + model.fit(x, y) + + # Predict using the model for the entire segment range + x_pred = pd.to_numeric(df.index).values.reshape(-1, 1) + + y_pred = model.predict(x_pred) + df = pd.Series(y_pred.flatten(), index=df.index) + - y_sm = lowess(df, - pd.to_numeric(df.index), - is_sorted=True, frac=1/3, it=0, - ) - df.loc[df.notnull()] = y_sm[:,1] - df = df.interpolate(method='linear', limit_area='inside') - - last_valid_6_months = slice(df.last_valid_index()-pd.to_timedelta('180D'),None) - df.loc[last_valid_6_months] = (df.loc[last_valid_6_months].interpolate( axis=0, - method='spline',order=1, limit_direction='forward', fill_value="extrapolate")).values - - first_valid_6_months = slice(None, df.first_valid_index()+pd.to_timedelta('180D')) - df.loc[first_valid_6_months] = (df.loc[first_valid_6_months].interpolate( axis=0, - method='spline',order=1, limit_direction='backward', fill_value="extrapolate")).values - df_all=pd.concat((df_all, df)) - + # Update df_all with predicted values for the current segment + df_all = pd.concat([df_all, df]) + + # Fill internal gaps with linear interpolation + df_all = df_all.interpolate(method='linear', limit_area='inside') + + # Extrapolate for timestamps before the first valid measurement + first_valid_6_months = slice(None, df_in.first_valid_index() + pd.to_timedelta('180D')) + df_all.loc[first_valid_6_months] = ( + df_all.loc[first_valid_6_months].interpolate( + method='linear', limit_direction='backward', fill_value="extrapolate" + ) + ).values + + # Extrapolate for timestamps after the last valid measurement + last_valid_6_months = slice(df_in.last_valid_index() - pd.to_timedelta('180D'), None) + df_all.loc[last_valid_6_months] = ( + df_all.loc[last_valid_6_months].interpolate( + method='linear', limit_direction='forward', fill_value="extrapolate" + ) + ).values + + # Remove duplicate indices and return values as numpy array df_all = df_all[~df_all.index.duplicated(keep='first')] return df_all.values + def _calcAtmosDens(p_h, R_d, T_h, T_0): # TODO: check this shouldn't be in this step somewhere '''Calculate atmospheric density''' return 100 * p_h / R_d / (T_h + T_0) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index c2897623..67ff7616 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -12,6 +12,8 @@ def parse_arguments_l2tol3(debug_args=None): parser = ArgumentParser(description="AWS L3 script for the processing L3 "+ "data from L2. An hourly, daily and monthly L3 "+ "data product is outputted to the defined output path") + parser.add_argument('-c', '--config_folder', type=str, required=True, + help='Path to folder with sites configuration (TOML) files') parser.add_argument('-i', '--inpath', type=str, required=True, help='Path to Level 2 .nc data file') parser.add_argument('-o', '--outpath', default=None, type=str, required=False, @@ -24,7 +26,7 @@ def parse_arguments_l2tol3(debug_args=None): args = parser.parse_args(args=debug_args) return args -def get_l2tol3(inpath, outpath, variables, metadata): +def get_l2tol3(config_folder, inpath, outpath, variables, metadata): logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, @@ -46,7 +48,7 @@ def get_l2tol3(inpath, outpath, variables, metadata): l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms']) # Perform Level 3 processing - l3 = toL3(l2) + l3 = toL3(l2, config_folder) # Write Level 3 dataset to file if output directory given v = getVars(variables) From fe8f11dad3c0bb3a12f361b2b56a8dd1e221dccf Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Fri, 28 Jun 2024 22:22:27 +0200 Subject: [PATCH 05/14] fixed arguments in get_l2tol3.py and logger in join_l3 --- src/pypromice/process/L2toL3.py | 2 +- src/pypromice/process/get_l2tol3.py | 2 +- src/pypromice/process/join_l3.py | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index b6699b1d..ae553f3d 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -11,7 +11,7 @@ import logging logger = logging.getLogger(__name__) -def toL3(L2, T_0=273.15, config_folder='../aws-l0/metadata/station_configurations/'): +def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273.15): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - Turbulent fluxes diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index 67ff7616..6e5ba059 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -61,7 +61,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata): def main(): args = parse_arguments_l2tol3() - _ = get_l2tol3(args.inpath, args.outpath, args.variables, args.metadata) + _ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata) if __name__ == "__main__": main() diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index e062911c..3e38ae97 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -6,6 +6,11 @@ import numpy as np import pandas as pd import xarray as xr +logging.basicConfig( + format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", + level=logging.INFO, + stream=sys.stdout, +) logger = logging.getLogger(__name__) def parse_arguments_joinl3(debug_args=None): @@ -202,16 +207,16 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me filepath = os.path.join(folder_l3, stid, stid+'_hour.nc') isNead = False - if station_info["project"].lower() in ["historical gc-net", "glaciobasis"]: + if station_info["project"].lower() in ["historical gc-net"]: filepath = os.path.join(folder_gcnet, stid+'.csv') isNead = True - if not os.path.isfile(filepath): + if not os.path.isfile(filepath): logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet) continue l3, _ = loadArr(filepath, isNead) list_station_data.append((l3, station_info)) - + # Sort the list in reverse chronological order so that we start with the latest data sorted_list_station_data = sorted(list_station_data, key=lambda x: x[0].time.max(), reverse=True) sorted_stids = [info["stid"] for _, info in sorted_list_station_data] @@ -280,6 +285,8 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me # Assign site id + if l3_merged == None: + logger.error('No level 2 data file found for '+site) l3_merged.attrs['site_id'] = site l3_merged.attrs['stations'] = ' '.join(sorted_stids) l3_merged.attrs['level'] = 'L3' From 5e02f5ecc40c4ebd2f1f6b6cb1d4c26e7c35dd97 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Fri, 28 Jun 2024 16:02:05 +0200 Subject: [PATCH 06/14] removing statsmodels from setup and envirnoment --- environment.yml | 1 - setup.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 96a569f4..b982311f 100644 --- a/environment.yml +++ b/environment.yml @@ -72,7 +72,6 @@ dependencies: - setuptools=68.2.2=py38h06a4308_0 - six=1.16.0=pyh6c4a22f_0 - sqlite=3.41.2=h5eee18b_0 - - statsmodels=0.13.2=py39h2bbff1b_0 - tbb=2021.8.0=hdb19cb5_0 - threadpoolctl=3.4.0=pyhc1e730c_0 - tk=8.6.12=h1ccaba5_0 diff --git a/setup.py b/setup.py index ce8c9bd4..52a9b216 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "pypromice.qc.percentiles": ["thresholds.csv"], "pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"], }, - install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.13.2'], + install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'], # extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']}, entry_points={ 'console_scripts': [ From 537d88fb07e5b86da58fe09bca3a9ad1d4512091 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Sat, 29 Jun 2024 11:37:42 +0200 Subject: [PATCH 07/14] fixed level_2 file not found case --- src/pypromice/process/join_l3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 3e38ae97..10a2c769 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -285,7 +285,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me # Assign site id - if l3_merged == None: + if not l3_merged: logger.error('No level 2 data file found for '+site) l3_merged.attrs['site_id'] = site l3_merged.attrs['stations'] = ' '.join(sorted_stids) From 5c452b39906e364ca49f70dc3cc55dd71c9322f9 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Sat, 29 Jun 2024 13:53:36 +0200 Subject: [PATCH 08/14] removed old lat/lon/alt attribute, replace by lat_avg/lon_avg/alt_avg --- src/pypromice/process/write.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index cc4d8fe5..5f9e4593 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -100,7 +100,6 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi writeCSV(out_csv, d2, col_names) # Write to netcdf file - col_names = col_names + ['lat', 'lon', 'alt'] writeNC(out_nc, d2, col_names) logger.info(f'Written to {out_csv}') logger.info(f'Written to {out_nc}') @@ -245,16 +244,24 @@ def addMeta(ds, meta): ds : xarray.Dataset Dataset with metadata ''' - if 'gps_lon' in ds.keys(): - ds['lon'] = ds['gps_lon'].mean() - ds['lon'].attrs = ds['gps_lon'].attrs - - ds['lat'] = ds['gps_lat'].mean() - ds['lat'].attrs = ds['gps_lat'].attrs - - ds['alt'] = ds['gps_alt'].mean() - ds['alt'].attrs = ds['gps_alt'].attrs - + if 'lon' in ds.keys(): + # caluclating average coordinates based on the extra/interpolated coords + for v in ['lat','lon','alt']: + ds.attrs[v+'_avg'] = ds[v].mean().item() + # dropping the less accurate standard coordinates given in the + # raw or tx config files + for v in ['latitude','longitude']: + if v in ds.attrs.keys(): + del ds.attrs[v] + elif 'gps_lon' in ds.keys(): + # caluclating average coordinates based on the measured coords (can be gappy) + for v in ['gps_lat','gps_lon','gps_alt']: + ds.attrs[v+'_avg'] = ds[v].mean().item() + # dropping the less accurate standard coordinates given in the + # raw or tx config files + for v in ['latitude','longitude']: + if v in ds.attrs.keys(): + del ds.attrs[v] # Attribute convention for data discovery # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3 From d96246c00c69de0f6ba39e2c6ed9afcb7addc9b9 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Sat, 29 Jun 2024 16:04:03 +0200 Subject: [PATCH 09/14] fixed issues --- src/pypromice/process/L2toL3.py | 19 +++---- src/pypromice/process/write.py | 70 ++++++++++++++++++++------ src/pypromice/qc/github_data_issues.py | 11 ++-- src/pypromice/qc/persistence.py | 4 +- 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index ae553f3d..cd9e70fa 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -106,17 +106,14 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati # saving the static value of 'lat','lon' or 'alt' stored in attribute # as it might be the only coordinate available for certain stations (e.g. bedrock) var_out = var.replace('gps_','') - - if var_out == 'alt': - if 'altitude' in list(ds.attrs.keys()): - static_value = float(ds.attrs['altitude']) - else: - # print('no standard altitude for', ds.attrs['station_id']) - static_value = np.nan - elif var_out == 'lat': - static_value = float(ds.attrs['latitude']) - elif var_out == 'lon': - static_value = float(ds.attrs['longitude']) + coord_names = {'alt':'altitude', 'lat':'latitude','lon':'longitude'} + + if var_out+'_avg' in list(ds.attrs.keys()): + static_value = float(ds.attrs[var_out+'_avg']) + elif coord_names[var_out] in list(ds.attrs.keys()): + static_value = float(ds.attrs[coord_names[var_out]]) + else: + static_value = np.nan # if there is no gps observations, then we use the static value repeated # for each time stamp diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index 5f9e4593..e8d9e6a7 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -48,7 +48,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi if 'gps_lon' in d2.keys(): d2 = reformat_lon(d2) else: - logger.info('%s does not have gpd_lon'%name) + logger.info('%s does not have gps_lon'%name) # Add variable attributes and metadata if vars_df is None: @@ -290,19 +290,61 @@ def addMeta(ds, meta): ds.attrs['date_metadata_modified'] = ds.attrs['date_created'] ds.attrs['processing_level'] = ds.attrs['level'].replace('L','level ') + + if 'lat' in ds.keys(): + lat_min = ds['lat'].min().values + lat_max = ds['lat'].max().values + elif 'gps_lat' in ds.keys(): + lat_min = ds['gps_lat'].min().values + lat_max = ds['gps_lat'].max().values + elif 'latitude' in ds.attrs.keys(): + lat_min = ds.attrs['latitude'] + lat_max = ds.attrs['latitude'] + else: + lat_min =np.nan + lat_max = np.nan + + + if 'lon' in ds.keys(): + lon_min = ds['lon'].min().values + lon_max = ds['lon'].max().values + elif 'gps_lon' in ds.keys(): + lon_min = ds['gps_lon'].min().values + lon_max = ds['gps_lon'].max().values + elif 'longitude' in ds.attrs.keys(): + lon_min = ds.attrs['longitude'] + lon_max = ds.attrs['longitude'] + else: + lon_min =np.nan + lon_max = np.nan + + if 'alt' in ds.keys(): + alt_min = ds['alt'].min().values + alt_max = ds['alt'].max().values + elif 'gps_alt' in ds.keys(): + alt_min = ds['gps_alt'].min().values + alt_max = ds['gps_alt'].max().values + elif 'altitude' in ds.attrs.keys(): + alt_min = ds.attrs['altitude'] + alt_max = ds.attrs['altitude'] + else: + alt_min =np.nan + alt_max = np.nan + ds.attrs['geospatial_bounds'] = "POLYGON((" + \ - f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \ - f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \ - f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \ - f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \ - f"{ds['lat'].min().values} {ds['lon'].min().values}))" - - ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values) - ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values) - ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values) - ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values) - ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values) - ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values) + f"{lat_min} {lon_min}, " + \ + f"{lat_min} {lon_max}, " + \ + f"{lat_max} {lon_max}, " + \ + f"{lat_max} {lon_min}, " + \ + f"{lat_min} {lon_min}))" + + ds.attrs['geospatial_lat_min'] = str(lat_min) + ds.attrs['geospatial_lat_max'] = str(lat_max) + ds.attrs['geospatial_lon_min'] = str(lon_min) + ds.attrs['geospatial_lon_max'] = str(lon_max) + ds.attrs['geospatial_vertical_min'] = str(alt_min) + ds.attrs['geospatial_vertical_max'] = str(alt_max) + ds.attrs['geospatial_vertical_positive'] = 'up' ds.attrs['time_coverage_start'] = str(ds['time'][0].values) ds.attrs['time_coverage_end'] = str(ds['time'][-1].values) @@ -394,4 +436,4 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']): if 'gps_lon' not in dataset.keys(): return dataset dataset['gps_lon'] = dataset['gps_lon'] * -1 - return dataset \ No newline at end of file + return dataset diff --git a/src/pypromice/qc/github_data_issues.py b/src/pypromice/qc/github_data_issues.py index fd4d96e0..41603ed8 100644 --- a/src/pypromice/qc/github_data_issues.py +++ b/src/pypromice/qc/github_data_issues.py @@ -65,10 +65,10 @@ def flagNAN(ds_in, for v in varlist: if v in list(ds.keys()): - logger.info(f'---> flagging {t0} {t1} {v}') + logger.debug(f'---> flagging {t0} {t1} {v}') ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1)) else: - logger.info(f'---> could not flag {v} not in dataset') + logger.debug(f'---> could not flag {v} not in dataset') return ds @@ -206,13 +206,14 @@ def adjustData(ds, t1 = pd.to_datetime(t1, utc=True).tz_localize(None) index_slice = dict(time=slice(t0, t1)) - if len(ds_out[var].loc[index_slice].time.time) == 0: + logger.info(f'---> {t0} {t1} {var} {func} {val}') logger.info("Time range does not intersect with dataset") continue - logger.info(f'---> {t0} {t1} {var} {func} {val}') - + else: + logger.debug(f'---> {t0} {t1} {var} {func} {val}') + if func == "add": ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val # flagging adjusted values diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index 5f5d55f4..963ff786 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -83,7 +83,7 @@ def persistence_qc( mask = mask & (df[v]<99) n_masked = mask.sum() n_samples = len(mask) - logger.info( + logger.debug( f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" ) # setting outliers to NaN @@ -96,7 +96,7 @@ def persistence_qc( n_masked = mask.sum() n_samples = len(mask) - logger.info( + logger.debug( f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" ) # setting outliers to NaN From d6d17357c184b3cdc6bf0f8c7bbd3e2615107d16 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 1 Jul 2024 10:58:17 +0200 Subject: [PATCH 10/14] printing nan fields in L2, list old variables to drop on join_l3 fixed warning in join_l3 --- src/pypromice/process/L2toL3.py | 16 ++++--- src/pypromice/process/aws.py | 1 + src/pypromice/process/join_l3.py | 42 +++++++++++-------- src/pypromice/process/write.py | 12 +++++- .../resources/variable_aliases_GC-Net.csv | 6 +-- 5 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index cd9e70fa..50848be6 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np import xarray as xr -import toml +import toml, os from sklearn.linear_model import LinearRegression import logging @@ -128,11 +128,15 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati # fetching the station relocation dates at which the coordinates will/should # have a break config_file = config_folder +"/" + ds.attrs['station_id'] + ".toml" - with open(config_file, "r") as f: - config_data = toml.load(f) - - # Extract station relocations from the TOML data - station_relocations = config_data.get("station_relocation", []) + if os.path.isfile(config_file): + with open(config_file, "r") as f: + config_data = toml.load(f) + + # Extract station relocations from the TOML data + station_relocations = config_data.get("station_relocation", []) + else: + station_relocations = [] + logger.warning('Did not find config file for '+ds.attrs['station_id']+'. Assuming no station relocation.') # Convert the ISO8601 strings to pandas datetime objects breaks = [pd.to_datetime(date_str) for date_str in station_relocations] diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 7314a19e..5e337cc7 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -91,6 +91,7 @@ def getL1(self): logger.info('Level 1 processing...') self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0] self.L1 = [toL1(item, self.vars) for item in self.L0] + self.L1.reverse() self.L1A = reduce(xr.Dataset.combine_first, self.L1) def getL2(self): diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 10a2c769..f4cf0d26 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -105,8 +105,20 @@ def readNead(infile): # combining thermocouple and CS100 temperatures ds['TA1'] = ds['TA1'].combine_first(ds['TA3']) ds['TA2'] = ds['TA2'].combine_first(ds['TA4']) - + ds=ds.rename(var_name) + + standard_vars_to_drop = ["NR", "TA3", "TA4", "TA5", "NR_cor", + "z_surf_1", "z_surf_2", "z_surf_combined", + "TA2m", "RH2m", "VW10m", "SZA", "SAA", + "depth_t_i_1", "depth_t_i_2", "depth_t_i_3", "depth_t_i_4", "depth_t_i_5", + "depth_t_i_6", "depth_t_i_7", "depth_t_i_8", "depth_t_i_9", "depth_t_i_10", "t_i_10m" + ] + standard_vars_to_drop = standard_vars_to_drop + [v for v in list(ds.keys()) if v.endswith("_adj_flag")] + + # Drop the variables if they are present in the dataset + ds = ds.drop_vars([var for var in standard_vars_to_drop if var in ds]) + ds=ds.rename({'timestamp':'time'}) return ds @@ -121,7 +133,8 @@ def loadArr(infile, isNead): ds = xr.Dataset.from_dataframe(df) elif infile.split('.')[-1].lower() in 'nc': - ds = xr.open_dataset(infile) + with xr.open_dataset(infile) as ds: + ds.load() # Remove encoding attributes from NetCDF for varname in ds.variables: if ds[varname].encoding!={}: @@ -211,10 +224,17 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me filepath = os.path.join(folder_gcnet, stid+'.csv') isNead = True if not os.path.isfile(filepath): - logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet) + logger.info(stid+' was listed as station but could not be found in '+folder_l3+' nor '+folder_gcnet) continue - l3, _ = loadArr(filepath, isNead) + l3, _ = loadArr(filepath, isNead) + + # removing specific variable from a given file + specific_vars_to_drop = station_info.get("skipped_variables",[]) + if len(specific_vars_to_drop)>0: + logger.info("Skipping %s from %s"%(specific_vars_to_drop, stid)) + l3 = l3.drop_vars([var for var in specific_vars_to_drop if var in l3]) + list_station_data.append((l3, station_info)) # Sort the list in reverse chronological order so that we start with the latest data @@ -251,19 +271,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me for v in l3_merged.data_vars: if v not in l3.data_vars: l3[v] = l3.t_u*np.nan - - # if l3 (older data) has variables that does not have l3_merged (newer data) - # then they are removed from l3 - list_dropped = [] - for v in l3.data_vars: - if v not in l3_merged.data_vars: - if v != 'z_stake': - list_dropped.append(v) - l3 = l3.drop(v) - else: - l3_merged[v] = ('time', l3_merged.t_u.data*np.nan) - logger.info('Unused variables in older dataset: '+' '.join(list_dropped)) - + # saving attributes of station under an attribute called $stid st_attrs = l3_merged.attrs.get('stations_attributes', {}) st_attrs[stid] = l3.attrs.copy() diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index e8d9e6a7..922c82d0 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -63,7 +63,11 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi d2 = roundValues(d2, vars_df) # Get variable names to write out - col_names = getColNames(vars_df, d2, remove_nan_fields=True) + if 'site_id' in d2.attrs.keys(): + remove_nan_fields = True + else: + remove_nan_fields = False + col_names = getColNames(vars_df, d2, remove_nan_fields=remove_nan_fields) # Define filename based on resample rate t = int(pd.Timedelta((d2['time'][1] - d2['time'][0]).values).total_seconds()) @@ -256,12 +260,16 @@ def addMeta(ds, meta): elif 'gps_lon' in ds.keys(): # caluclating average coordinates based on the measured coords (can be gappy) for v in ['gps_lat','gps_lon','gps_alt']: - ds.attrs[v+'_avg'] = ds[v].mean().item() + if v in ds.keys(): + ds.attrs[v+'_avg'] = ds[v].mean().item() + else: + ds.attrs[v+'_avg'] = np.nan # dropping the less accurate standard coordinates given in the # raw or tx config files for v in ['latitude','longitude']: if v in ds.attrs.keys(): del ds.attrs[v] + # Attribute convention for data discovery # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3 diff --git a/src/pypromice/resources/variable_aliases_GC-Net.csv b/src/pypromice/resources/variable_aliases_GC-Net.csv index a0b4fe14..d84c0a71 100644 --- a/src/pypromice/resources/variable_aliases_GC-Net.csv +++ b/src/pypromice/resources/variable_aliases_GC-Net.csv @@ -47,9 +47,9 @@ t_i_11, tilt_x, tilt_y, rot, -gps_lat,latitude -gps_lon,longitude -gps_alt,elevation +lat,latitude +lon,longitude +alt,elevation gps_time, gps_geounit, gps_hdop, From f66251fd6df1e5899f9e43de5ce417694b7c9a6f Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Tue, 2 Jul 2024 16:50:17 +0200 Subject: [PATCH 11/14] in pandas 1.5 new columns need to be added manually before a concat, we now also recalculate dirWindSpd if needed for historical data --- src/pypromice/process/join_l3.py | 3 +++ src/pypromice/process/resample.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index f4cf0d26..cf5b1e16 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -271,6 +271,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me for v in l3_merged.data_vars: if v not in l3.data_vars: l3[v] = l3.t_u*np.nan + for v in l3.data_vars: + if v not in l3_merged.data_vars: + l3_merged[v] = l3_merged.t_u*np.nan # saving attributes of station under an attribute called $stid st_attrs = l3_merged.attrs.get('stations_attributes', {}) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 7c7e2ed7..698a5fab 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -8,6 +8,7 @@ import logging import numpy as np import xarray as xr +from pypromice.process.L1toL2 import calcDirWindSpeeds logger = logging.getLogger(__name__) def resample_dataset(ds_h, t): @@ -35,12 +36,15 @@ def resample_dataset(ds_h, t): # recalculating wind direction from averaged directional wind speeds for var in ['wdir_u','wdir_l']: + boom = var.split('_')[1] if var in df_d.columns: - if ('wspd_x_'+var.split('_')[1] in df_d.columns) & ('wspd_y_'+var.split('_')[1] in df_d.columns): - df_d[var] = _calcWindDir(df_d['wspd_x_'+var.split('_')[1]], - df_d['wspd_y_'+var.split('_')[1]]) + if ('wspd_x_'+boom in df_d.columns) & ('wspd_y_'+boom in df_d.columns): + df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom]) else: - logger.info(var+' in dataframe but not wspd_x_'+var.split('_')[1]+' nor wspd_y_'+var.split('_')[1]) + logger.info(var+' in dataframe but not wspd_x_'+boom+' nor wspd_y_'+boom+', recalculating them') + ds_h['wspd_x_'+boom], ds_h['wspd_y_'+boom] = calcDirWindSpeeds(ds_h['wspd_'+boom], ds_h['wdir_'+boom]) + df_d[['wspd_x_'+boom, 'wspd_y_'+boom]] = ds_h[['wspd_x_'+boom, 'wspd_y_'+boom]].to_dataframe().resample(t).mean() + df_d[var] = _calcWindDir(df_d['wspd_x_'+boom], df_d['wspd_y_'+boom]) # recalculating relative humidity from average vapour pressure and average # saturation vapor pressure From 8e32b9cd344ff2086071e38cccf0621ad0ec06c0 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:59:27 +0200 Subject: [PATCH 12/14] not returning tuples, variable renaming and some clean up after review --- src/pypromice/process/L2toL3.py | 45 +++++++++++++-------------------- src/pypromice/process/write.py | 2 +- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 50848be6..0532d709 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -22,6 +22,10 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 ---------- L2 : xarray:Dataset L2 AWS data + config_folder : Dict + Dictionary containing the information necessary for the processing of + L3 variables (relocation dates for coordinates processing, or thermistor + string maintenance date for the thermistors depth) T_0 : int Steam point temperature. Default is 273.15. ''' @@ -95,7 +99,7 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: - ds[var.replace('gps_','')] = gpsCoordinatePostprocessing(ds, var) + ds[var.replace('gps_','')] = ('time', gpsCoordinatePostprocessing(ds, var)) # adding average coordinate as attribute for v in ['lat','lon','alt']: @@ -119,11 +123,11 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati # for each time stamp if var not in ds.data_vars: print('no',var,'at', ds.attrs['station_id']) - return ('time', np.ones_like(ds['t_u'].data)*static_value) + return np.ones_like(ds['t_u'].data)*static_value if ds[var].isnull().all(): print('no',var,'at',ds.attrs['station_id']) - return ('time', np.ones_like(ds['t_u'].data)*static_value) + return np.ones_like(ds['t_u'].data)*static_value # fetching the station relocation dates at which the coordinates will/should # have a break @@ -145,7 +149,7 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati else: logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks])) - return ('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)) + return piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks) def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, @@ -394,7 +398,7 @@ def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, ep # Convert to kg/kg return RH_cor_h * q_sat / 100 -def piecewise_smoothing_and_interpolation(df_in, breaks): +def piecewise_smoothing_and_interpolation(data_series, breaks): '''Smoothes, inter- or extrapolate the GPS observations. The processing is done piecewise so that each period between station relocations are done separately (no smoothing of the jump due to relocation). Piecewise linear @@ -405,7 +409,7 @@ def piecewise_smoothing_and_interpolation(df_in, breaks): Parameters ---------- - df_in : pandas.Series + data_series : pandas.Series Series of observed latitude, longitude or elevation with datetime index. breaks: list List of timestamps of station relocation. First and last item should be @@ -418,8 +422,9 @@ def piecewise_smoothing_and_interpolation(df_in, breaks): ''' df_all = pd.Series(dtype=float) # Initialize an empty Series to gather all smoothed pieces breaks = [None] + breaks + [None] + _inferred_series = [] for i in range(len(breaks) - 1): - df = df_in.loc[slice(breaks[i], breaks[i+1])].copy() + df = data_series.loc[slice(breaks[i], breaks[i+1])] # Drop NaN values and calculate the number of segments based on valid data df_valid = df.dropna() @@ -436,32 +441,16 @@ def piecewise_smoothing_and_interpolation(df_in, breaks): y_pred = model.predict(x_pred) df = pd.Series(y_pred.flatten(), index=df.index) - + # adds to list the predicted values for the current segment + _inferred_series.append(df) - # Update df_all with predicted values for the current segment - df_all = pd.concat([df_all, df]) + df_all = pd.concat(_inferred_series) # Fill internal gaps with linear interpolation df_all = df_all.interpolate(method='linear', limit_area='inside') - - # Extrapolate for timestamps before the first valid measurement - first_valid_6_months = slice(None, df_in.first_valid_index() + pd.to_timedelta('180D')) - df_all.loc[first_valid_6_months] = ( - df_all.loc[first_valid_6_months].interpolate( - method='linear', limit_direction='backward', fill_value="extrapolate" - ) - ).values - - # Extrapolate for timestamps after the last valid measurement - last_valid_6_months = slice(df_in.last_valid_index() - pd.to_timedelta('180D'), None) - df_all.loc[last_valid_6_months] = ( - df_all.loc[last_valid_6_months].interpolate( - method='linear', limit_direction='forward', fill_value="extrapolate" - ) - ).values - + # Remove duplicate indices and return values as numpy array - df_all = df_all[~df_all.index.duplicated(keep='first')] + df_all = df_all[~df_all.index.duplicated(keep='last')] return df_all.values diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index 922c82d0..647d6f75 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -443,5 +443,5 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']): if id not in exempt: if 'gps_lon' not in dataset.keys(): return dataset - dataset['gps_lon'] = dataset['gps_lon'] * -1 + dataset['gps_lon'] = np.abs(dataset['gps_lon']) * -1 return dataset From 55b5f3a5fff8c571ef38adc3325f56178a0c6c48 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:42:36 +0200 Subject: [PATCH 13/14] read config file in get_l2tol3 and pass only dictionary to L2toL3.py read config file in get_l2tol3 and pass only dictionary to L2toL3.py + fixed doc on L2toL3.py + better reverse of L1 dataset list in aws.py --- src/pypromice/process/L2toL3.py | 52 ++++++++++------------------- src/pypromice/process/aws.py | 3 +- src/pypromice/process/get_l2tol3.py | 9 +++-- 3 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 0532d709..d78161fd 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -11,7 +11,7 @@ import logging logger = logging.getLogger(__name__) -def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273.15): +def toL3(L2, station_config={}, T_0=273.15): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - Turbulent fluxes @@ -22,12 +22,12 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 ---------- L2 : xarray:Dataset L2 AWS data - config_folder : Dict + station_config : Dict Dictionary containing the information necessary for the processing of L3 variables (relocation dates for coordinates processing, or thermistor string maintenance date for the thermistors depth) T_0 : int - Steam point temperature. Default is 273.15. + Freezing point temperature. Default is 273.15. ''' ds = L2 ds.attrs['level'] = 'L3' @@ -99,14 +99,14 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: - ds[var.replace('gps_','')] = ('time', gpsCoordinatePostprocessing(ds, var)) + ds[var.replace('gps_','')] = ('time', gpsCoordinatePostprocessing(ds, var, station_config)) # adding average coordinate as attribute for v in ['lat','lon','alt']: ds.attrs[v+'_avg'] = ds[v].mean(dim='time').item() return ds -def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/station_configurations/'): +def gpsCoordinatePostprocessing(ds, var, station_config={}): # saving the static value of 'lat','lon' or 'alt' stored in attribute # as it might be the only coordinate available for certain stations (e.g. bedrock) var_out = var.replace('gps_','') @@ -129,18 +129,8 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati print('no',var,'at',ds.attrs['station_id']) return np.ones_like(ds['t_u'].data)*static_value - # fetching the station relocation dates at which the coordinates will/should - # have a break - config_file = config_folder +"/" + ds.attrs['station_id'] + ".toml" - if os.path.isfile(config_file): - with open(config_file, "r") as f: - config_data = toml.load(f) - - # Extract station relocations from the TOML data - station_relocations = config_data.get("station_relocation", []) - else: - station_relocations = [] - logger.warning('Did not find config file for '+ds.attrs['station_id']+'. Assuming no station relocation.') + # Extract station relocations from the TOML data + station_relocations = station_config.get("station_relocation", []) # Convert the ISO8601 strings to pandas datetime objects breaks = [pd.to_datetime(date_str) for date_str in station_relocations] @@ -162,7 +152,7 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, Parameters ---------- T_0 : int - Steam point temperature + Freezing point temperature T_h : xarray.DataArray Air temperature Tsurf_h : xarray.DataArray @@ -175,8 +165,6 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, Height of anemometer z_T : float Height of thermometer - nu : float - Kinematic viscosity of air q_h : xarray.DataArray Specific humidity p_h : xarray.DataArray @@ -191,14 +179,9 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, g : int Gravitational acceleration (m/s2). Default is 9.82. es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. - es_100 : int - Saturation vapour pressure at steam point temperature (hPa). Default is - 1013.246. + Saturation vapour pressure at the melting point (hPa). Default is 6.1071. eps : int Ratio of molar masses of vapor and dry air (0.622). - R_d : int - Gas constant of dry air. Default is 287.05. gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. L_sub : int @@ -219,9 +202,8 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, dd : int Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.35. - z_0 : int - Aerodynamic surface roughness length for momention, assumed constant - for all ice/snow surfaces. Default is 0.001. + R_d : int + Gas constant of dry air. Default is 287.05. Returns ------- @@ -358,16 +340,16 @@ def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, ep Steam point temperature in Kelvin T_h : xarray.DataArray Air temperature - eps : int - ratio of molar masses of vapor and dry air (0.622) - es_0 : float - Saturation vapour pressure at the melting point (hPa) - es_100 : float - Saturation vapour pressure at steam point temperature (hPa) p_h : xarray.DataArray Air pressure RH_cor_h : xarray.DataArray Relative humidity corrected + es_0 : float + Saturation vapour pressure at the melting point (hPa) + es_100 : float + Saturation vapour pressure at steam point temperature (hPa) + eps : int + ratio of molar masses of vapor and dry air (0.622) Returns ------- diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 5e337cc7..b3db8d9e 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -91,8 +91,7 @@ def getL1(self): logger.info('Level 1 processing...') self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0] self.L1 = [toL1(item, self.vars) for item in self.L0] - self.L1.reverse() - self.L1A = reduce(xr.Dataset.combine_first, self.L1) + self.L1A = reduce(xr.Dataset.combine_first, reversed(self.L1)) def getL2(self): '''Perform L1 to L2 data processing''' diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index 6e5ba059..ca08b17c 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -import os, logging, sys +import os, logging, sys, toml import xarray as xr from argparse import ArgumentParser import pypromice @@ -13,6 +13,7 @@ def parse_arguments_l2tol3(debug_args=None): "data from L2. An hourly, daily and monthly L3 "+ "data product is outputted to the defined output path") parser.add_argument('-c', '--config_folder', type=str, required=True, + default='../aws-l0/metadata/station_configurations/', help='Path to folder with sites configuration (TOML) files') parser.add_argument('-i', '--inpath', type=str, required=True, help='Path to Level 2 .nc data file') @@ -47,8 +48,12 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata): if 'number_of_booms' in l2.attrs.keys(): l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms']) + # importing station_config (dict) from config_folder (str path) + config_file = config_folder+l2.attrs['station_id']+'.toml' + with open(config_file) as fp: + station_config = toml.load(fp) # Perform Level 3 processing - l3 = toL3(l2, config_folder) + l3 = toL3(l2, station_config) # Write Level 3 dataset to file if output directory given v = getVars(variables) From d86384f1ee3aa3646ded651b066077366c88dbb8 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Fri, 5 Jul 2024 14:33:05 +0200 Subject: [PATCH 14/14] updated lat, lon units --- src/pypromice/resources/variables.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index 77eaa7b8..78354105 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -70,8 +70,8 @@ gps_geounit,gps_geounit,GeoUnit,-,qualityInformation,time,TRUE,L0 or L2,,,,all,1 gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,2 gps_numsat,gps_numsat,GPS number of satellites,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0,0 gps_q,gps_q,Quality,-,qualityInformation,time,TRUE,L0 or L2,,,,,1,1,0, -lat,latitude_postprocessed,smoothed and interpolated latitude of station (best estimate),degrees_N,modelResult,time,TRUE,L3,,,,all,0,0,1,6 -lon,longitude_postprocessed,smoothed and interpolated longitude of station (best estimate),degrees_E,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +lat,latitude_postprocessed,smoothed and interpolated latitude of station (best estimate),degrees_north,modelResult,time,TRUE,L3,,,,all,0,0,1,6 +lon,longitude_postprocessed,smoothed and interpolated longitude of station (best estimate),degrees_east,modelResult,time,TRUE,L3,,,,all,0,0,1,6 alt,altitude_postprocessed,smoothed and interpolated altitude of station (best estimate),m,modelResult,time,TRUE,L3,,,,all,0,0,1,2 batt_v,battery_voltage,Battery voltage,V,physicalMeasurement,time,TRUE,,0,30,,all,1,1,1,2 batt_v_ini,,,-,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2