diff --git a/environment.yml b/environment.yml index b982311f..96a569f4 100644 --- a/environment.yml +++ b/environment.yml @@ -72,6 +72,7 @@ dependencies: - setuptools=68.2.2=py38h06a4308_0 - six=1.16.0=pyh6c4a22f_0 - sqlite=3.41.2=h5eee18b_0 + - statsmodels=0.13.2=py39h2bbff1b_0 - tbb=2021.8.0=hdb19cb5_0 - threadpoolctl=3.4.0=pyhc1e730c_0 - tk=8.6.12=h1ccaba5_0 diff --git a/setup.py b/setup.py index 52a9b216..ce8c9bd4 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "pypromice.qc.percentiles": ["thresholds.csv"], "pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"], }, - install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'], + install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.13.2'], # extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']}, entry_points={ 'console_scripts': [ diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index f265d9b2..ee7b32a8 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -5,14 +5,14 @@ import pandas as pd import numpy as np import xarray as xr -import toml +from statsmodels.nonparametric.smoothers_lowess import lowess from sklearn.linear_model import LinearRegression from pypromice.qc.github_data_issues import adjustData from scipy.interpolate import interp1d import logging, os logger = logging.getLogger(__name__) -def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273.15): +def toL3(L2, T_0=273.15): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - Turbulent fluxes @@ -34,68 +34,48 @@ def toL3(L2, config_folder='../aws-l0/metadata/station_configurations/', T_0=273 T_100 = _getTempK(T_0) # Get steam point temperature as K # Turbulent heat flux calculation - if ('t_u' in ds.keys()) and \ - ('p_u' in ds.keys()) and \ - ('rh_u_cor' in ds.keys()): - # Upper boom bulk calculation - T_h_u = ds['t_u'].copy() # Copy for processing - p_h_u = ds['p_u'].copy() - RH_cor_h_u = ds['rh_u_cor'].copy() - - q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity - if ('wspd_u' in ds.keys()) and \ - ('t_surf' in ds.keys()) and \ - ('z_boom_u' in ds.keys()): - WS_h_u = ds['wspd_u'].copy() - Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? - z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer - z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - - if not ds.attrs['bedrock']: - SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, q_h_u, p_h_u) + # Upper boom bulk calculation + T_h_u = ds['t_u'].copy() # Copy for processing + p_h_u = ds['p_u'].copy() + WS_h_u = ds['wspd_u'].copy() + RH_cor_h_u = ds['rh_u_cor'].copy() + Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? + z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer + z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - ds['dshf_u'] = (('time'), SHF_h_u.data) - ds['dlhf_u'] = (('time'), LHF_h_u.data) - else: - logger.info('wspd_u, t_surf or z_boom_u missing, cannot calulate tubrulent heat fluxes') + q_h_u = calcSpHumid(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + + if not ds.attrs['bedrock']: + SHF_h_u, LHF_h_u= calcHeatFlux(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes + z_WS_u, z_T_u, q_h_u, p_h_u) - q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg - ds['qh_u'] = (('time'), q_h_u.data) - else: - logger.info('t_u, p_u or rh_u_cor missing, cannot calulate tubrulent heat fluxes') + ds['dshf_u'] = (('time'), SHF_h_u.data) + ds['dlhf_u'] = (('time'), LHF_h_u.data) + + q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg + ds['qh_u'] = (('time'), q_h_u.data) # Lower boom bulk calculation if ds.attrs['number_of_booms']==2: - if ('t_l' in ds.keys()) and \ - ('p_l' in ds.keys()) and \ - ('rh_l_cor' in ds.keys()): - T_h_l = ds['t_l'].copy() # Copy for processing - p_h_l = ds['p_l'].copy() - RH_cor_h_l = ds['rh_l_cor'].copy() - - q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity - - if ('wspd_l' in ds.keys()) and \ - ('t_surf' in ds.keys()) and \ - ('z_boom_l' in ds.keys()): - z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W - z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - WS_h_l = ds['wspd_l'].copy() - if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, q_h_l, p_h_l) + T_h_l = ds['t_l'].copy() # Copy for processing + p_h_l = ds['p_l'].copy() + WS_h_l = ds['wspd_l'].copy() + RH_cor_h_l = ds['rh_l_cor'].copy() + z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W + z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - ds['dshf_l'] = (('time'), SHF_h_l.data) - ds['dlhf_l'] = (('time'), LHF_h_l.data) - else: - logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes') - - q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg - ds['qh_l'] = (('time'), q_h_l.data) - else: - logger.info('t_l, p_l or rh_l_cor missing, cannot calulate tubrulent heat fluxes') + q_h_l = calcSpHumid(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity + + if not ds.attrs['bedrock']: + SHF_h_l, LHF_h_l= calcHeatFlux(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + ds['dshf_l'] = (('time'), SHF_h_l.data) + ds['dlhf_l'] = (('time'), LHF_h_l.data) + q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg + + ds['qh_l'] = (('time'), q_h_l.data) + # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: ds[var.replace('gps_','')] = gpsCoordinatePostprocessing(ds, var) @@ -782,18 +762,21 @@ def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth= return df_interp -def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/station_configurations/'): +def gpsCoordinatePostprocessing(ds, var): # saving the static value of 'lat','lon' or 'alt' stored in attribute # as it might be the only coordinate available for certain stations (e.g. bedrock) var_out = var.replace('gps_','') - coord_names = {'alt':'altitude', 'lat':'latitude','lon':'longitude'} - - if var_out+'_avg' in list(ds.attrs.keys()): - static_value = float(ds.attrs[var_out+'_avg']) - elif coord_names[var_out] in list(ds.attrs.keys()): - static_value = float(ds.attrs[coord_names[var_out]]) - else: - static_value = np.nan + + if var_out == 'alt': + if 'altitude' in list(ds.attrs.keys()): + static_value = float(ds.attrs['altitude']) + else: + print('no standard altitude for', ds.attrs['station_id']) + static_value = np.nan + elif var_out == 'lat': + static_value = float(ds.attrs['latitude']) + elif var_out == 'lon': + static_value = float(ds.attrs['longitude']) # if there is no gps observations, then we use the static value repeated # for each time stamp @@ -805,22 +788,16 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati print('no',var,'at',ds.attrs['station_id']) return ('time', np.ones_like(ds['t_u'].data)*static_value) - # fetching the station relocation dates at which the coordinates will/should - # have a break - config_file = config_folder +"/" + ds.attrs['station_id'] + ".toml" - with open(config_file, "r") as f: - config_data = toml.load(f) - - # Extract station relocations from the TOML data - station_relocations = config_data.get("station_relocation", []) - - # Convert the ISO8601 strings to pandas datetime objects - breaks = [pd.to_datetime(date_str) for date_str in station_relocations] - if len(breaks)==0: - logger.info('processing '+var+' without relocation') + # here we detect potential relocation of the station in the form of a + # break in the general trend of the latitude, longitude and altitude + # in the future, this could/should be listed in an external file to + # avoid missed relocations or sensor issues interpreted as a relocation + if var == 'gps_alt': + _, breaks = find_breaks(ds[var].to_series(), alpha=8) else: - logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks])) - + _, breaks = find_breaks(ds[var].to_series(), alpha=6) + + # smoothing and inter/extrapolation of the coordinate return ('time', piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks)) @@ -1070,77 +1047,75 @@ def calcSpHumid(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, ep # Convert to kg/kg return RH_cor_h * q_sat / 100 + +def find_breaks(df,alpha): + '''Detects potential relocation of the station from the GPS measurements. + The code first makes a forward linear interpolation of the coordinates and + then looks for important jumps in latitude, longitude and altitude. The jumps + that are higher than a given threshold (expressed as a multiple of the + standard deviation) are mostly caused by the station being moved during + maintenance. To avoid misclassification, only the jumps detected in May-Sept. + are kept. + + Parameters + ---------- + df : pandas.Series + series of observed latitude, longitude or elevation + alpha: float + coefficient to be applied to the the standard deviation of the daily + coordinate fluctuation + ''' + diff = df.resample('D').median().interpolate( + method='linear', limit_area='inside', limit_direction='forward').diff() + thresh = diff.std() * alpha + list_diff = diff.loc[diff.abs()>thresh].reset_index() + list_diff = list_diff.loc[list_diff.time.dt.month.isin([5,6,7,8,9])] + list_diff['year']=list_diff.time.dt.year + list_diff=list_diff.groupby('year').max() + return diff, [None]+list_diff.time.to_list()+[None] + + def piecewise_smoothing_and_interpolation(df_in, breaks): - '''Smoothes, inter- or extrapolate the GPS observations. The processing is - done piecewise so that each period between station relocations are done - separately (no smoothing of the jump due to relocation). Piecewise linear - regression is then used to smooth the available observations. Then this - smoothed curve is interpolated linearly over internal gaps. Eventually, this - interpolated curve is extrapolated linearly for timestamps before the first - valid measurement and after the last valid measurement. + '''Smoothes, inter- or extrapolate the gps observations. The processing is + done piecewise so that each period between station relocation are done + separately (no smoothing of the jump due to relocation). Locally Weighted + Scatterplot Smoothing (lowess) is then used to smooth the available + observations. Then this smoothed curve is interpolated linearly over internal + gaps. Eventually, this interpolated curve is extrapolated linearly for + timestamps before the first valid measurement and after the last valid + measurement. Parameters ---------- df_in : pandas.Series - Series of observed latitude, longitude or elevation with datetime index. + series of observed latitude, longitude or elevation breaks: list List of timestamps of station relocation. First and last item should be None so that they can be used in slice(breaks[i], breaks[i+1]) - - Returns - ------- - np.ndarray - Smoothed and interpolated values corresponding to the input series. ''' - df_all = pd.Series(dtype=float) # Initialize an empty Series to gather all smoothed pieces - breaks = [None] + breaks + [None] - for i in range(len(breaks) - 1): + df_all = pd.Series() # dataframe gathering all the smoothed pieces + for i in range(len(breaks)-1): df = df_in.loc[slice(breaks[i], breaks[i+1])].copy() - - # Drop NaN values and calculate the number of segments based on valid data - df_valid = df.dropna() - if df_valid.shape[0] > 2: - # Fit linear regression model to the valid data range - x = pd.to_numeric(df_valid.index).values.reshape(-1, 1) - y = df_valid.values.reshape(-1, 1) - - model = LinearRegression() - model.fit(x, y) - - # Predict using the model for the entire segment range - x_pred = pd.to_numeric(df.index).values.reshape(-1, 1) - - y_pred = model.predict(x_pred) - df = pd.Series(y_pred.flatten(), index=df.index) - - # Update df_all with predicted values for the current segment - df_all = pd.concat([df_all, df]) - - # Fill internal gaps with linear interpolation - df_all = df_all.interpolate(method='linear', limit_area='inside') - - # Extrapolate for timestamps before the first valid measurement - first_valid_6_months = slice(None, df_in.first_valid_index() + pd.to_timedelta('180D')) - df_all.loc[first_valid_6_months] = ( - df_all.loc[first_valid_6_months].interpolate( - method='linear', limit_direction='backward', fill_value="extrapolate" - ) - ).values - - # Extrapolate for timestamps after the last valid measurement - last_valid_6_months = slice(df_in.last_valid_index() - pd.to_timedelta('180D'), None) - df_all.loc[last_valid_6_months] = ( - df_all.loc[last_valid_6_months].interpolate( - method='linear', limit_direction='forward', fill_value="extrapolate" - ) - ).values - - # Remove duplicate indices and return values as numpy array + y_sm = lowess(df, + pd.to_numeric(df.index), + is_sorted=True, frac=1/3, it=0, + ) + df.loc[df.notnull()] = y_sm[:,1] + df = df.interpolate(method='linear', limit_area='inside') + + last_valid_6_months = slice(df.last_valid_index()-pd.to_timedelta('180D'),None) + df.loc[last_valid_6_months] = (df.loc[last_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='forward', fill_value="extrapolate")).values + + first_valid_6_months = slice(None, df.first_valid_index()+pd.to_timedelta('180D')) + df.loc[first_valid_6_months] = (df.loc[first_valid_6_months].interpolate( axis=0, + method='spline',order=1, limit_direction='backward', fill_value="extrapolate")).values + df_all=pd.concat((df_all, df)) + df_all = df_all[~df_all.index.duplicated(keep='first')] return df_all.values - def _calcAtmosDens(p_h, R_d, T_h, T_0): # TODO: check this shouldn't be in this step somewhere '''Calculate atmospheric density''' return 100 * p_h / R_d / (T_h + T_0) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index 84a33e20..75be5b41 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -12,8 +12,6 @@ def parse_arguments_l2tol3(debug_args=None): parser = ArgumentParser(description="AWS L3 script for the processing L3 "+ "data from L2. An hourly, daily and monthly L3 "+ "data product is outputted to the defined output path") - parser.add_argument('-c', '--config_folder', type=str, required=True, - help='Path to folder with sites configuration (TOML) files') parser.add_argument('-i', '--inpath', type=str, required=True, help='Path to Level 2 .nc data file') parser.add_argument('-o', '--outpath', default=None, type=str, required=False, @@ -26,7 +24,7 @@ def parse_arguments_l2tol3(debug_args=None): args = parser.parse_args(args=debug_args) return args -def get_l2tol3(config_folder, inpath, outpath, variables, metadata): +def get_l2tol3(inpath, outpath, variables, metadata): logging.basicConfig( format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", level=logging.INFO, @@ -48,7 +46,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata): l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms']) # Perform Level 3 processing - l3 = toL3(l2, config_folder) + l3 = toL3(l2) # Write Level 3 dataset to file if output directory given v = getVars(variables) @@ -61,7 +59,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata): def main(): args = parse_arguments_l2tol3() - _ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata) + _ = get_l2tol3(args.inpath, args.outpath, args.variables, args.metadata) if __name__ == "__main__": main() diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 10a2c769..e062911c 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -6,11 +6,6 @@ import numpy as np import pandas as pd import xarray as xr -logging.basicConfig( - format="%(asctime)s; %(levelname)s; %(name)s; %(message)s", - level=logging.INFO, - stream=sys.stdout, -) logger = logging.getLogger(__name__) def parse_arguments_joinl3(debug_args=None): @@ -207,16 +202,16 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me filepath = os.path.join(folder_l3, stid, stid+'_hour.nc') isNead = False - if station_info["project"].lower() in ["historical gc-net"]: + if station_info["project"].lower() in ["historical gc-net", "glaciobasis"]: filepath = os.path.join(folder_gcnet, stid+'.csv') isNead = True - if not os.path.isfile(filepath): + if not os.path.isfile(filepath): logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet) continue l3, _ = loadArr(filepath, isNead) list_station_data.append((l3, station_info)) - + # Sort the list in reverse chronological order so that we start with the latest data sorted_list_station_data = sorted(list_station_data, key=lambda x: x[0].time.max(), reverse=True) sorted_stids = [info["stid"] for _, info in sorted_list_station_data] @@ -285,8 +280,6 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me # Assign site id - if not l3_merged: - logger.error('No level 2 data file found for '+site) l3_merged.attrs['site_id'] = site l3_merged.attrs['stations'] = ' '.join(sorted_stids) l3_merged.attrs['level'] = 'L3' diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index e8d9e6a7..cc4d8fe5 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -48,7 +48,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi if 'gps_lon' in d2.keys(): d2 = reformat_lon(d2) else: - logger.info('%s does not have gps_lon'%name) + logger.info('%s does not have gpd_lon'%name) # Add variable attributes and metadata if vars_df is None: @@ -100,6 +100,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi writeCSV(out_csv, d2, col_names) # Write to netcdf file + col_names = col_names + ['lat', 'lon', 'alt'] writeNC(out_nc, d2, col_names) logger.info(f'Written to {out_csv}') logger.info(f'Written to {out_nc}') @@ -244,24 +245,16 @@ def addMeta(ds, meta): ds : xarray.Dataset Dataset with metadata ''' - if 'lon' in ds.keys(): - # caluclating average coordinates based on the extra/interpolated coords - for v in ['lat','lon','alt']: - ds.attrs[v+'_avg'] = ds[v].mean().item() - # dropping the less accurate standard coordinates given in the - # raw or tx config files - for v in ['latitude','longitude']: - if v in ds.attrs.keys(): - del ds.attrs[v] - elif 'gps_lon' in ds.keys(): - # caluclating average coordinates based on the measured coords (can be gappy) - for v in ['gps_lat','gps_lon','gps_alt']: - ds.attrs[v+'_avg'] = ds[v].mean().item() - # dropping the less accurate standard coordinates given in the - # raw or tx config files - for v in ['latitude','longitude']: - if v in ds.attrs.keys(): - del ds.attrs[v] + if 'gps_lon' in ds.keys(): + ds['lon'] = ds['gps_lon'].mean() + ds['lon'].attrs = ds['gps_lon'].attrs + + ds['lat'] = ds['gps_lat'].mean() + ds['lat'].attrs = ds['gps_lat'].attrs + + ds['alt'] = ds['gps_alt'].mean() + ds['alt'].attrs = ds['gps_alt'].attrs + # Attribute convention for data discovery # https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3 @@ -290,61 +283,19 @@ def addMeta(ds, meta): ds.attrs['date_metadata_modified'] = ds.attrs['date_created'] ds.attrs['processing_level'] = ds.attrs['level'].replace('L','level ') - - if 'lat' in ds.keys(): - lat_min = ds['lat'].min().values - lat_max = ds['lat'].max().values - elif 'gps_lat' in ds.keys(): - lat_min = ds['gps_lat'].min().values - lat_max = ds['gps_lat'].max().values - elif 'latitude' in ds.attrs.keys(): - lat_min = ds.attrs['latitude'] - lat_max = ds.attrs['latitude'] - else: - lat_min =np.nan - lat_max = np.nan - - - if 'lon' in ds.keys(): - lon_min = ds['lon'].min().values - lon_max = ds['lon'].max().values - elif 'gps_lon' in ds.keys(): - lon_min = ds['gps_lon'].min().values - lon_max = ds['gps_lon'].max().values - elif 'longitude' in ds.attrs.keys(): - lon_min = ds.attrs['longitude'] - lon_max = ds.attrs['longitude'] - else: - lon_min =np.nan - lon_max = np.nan - - if 'alt' in ds.keys(): - alt_min = ds['alt'].min().values - alt_max = ds['alt'].max().values - elif 'gps_alt' in ds.keys(): - alt_min = ds['gps_alt'].min().values - alt_max = ds['gps_alt'].max().values - elif 'altitude' in ds.attrs.keys(): - alt_min = ds.attrs['altitude'] - alt_max = ds.attrs['altitude'] - else: - alt_min =np.nan - alt_max = np.nan - ds.attrs['geospatial_bounds'] = "POLYGON((" + \ - f"{lat_min} {lon_min}, " + \ - f"{lat_min} {lon_max}, " + \ - f"{lat_max} {lon_max}, " + \ - f"{lat_max} {lon_min}, " + \ - f"{lat_min} {lon_min}))" - - ds.attrs['geospatial_lat_min'] = str(lat_min) - ds.attrs['geospatial_lat_max'] = str(lat_max) - ds.attrs['geospatial_lon_min'] = str(lon_min) - ds.attrs['geospatial_lon_max'] = str(lon_max) - ds.attrs['geospatial_vertical_min'] = str(alt_min) - ds.attrs['geospatial_vertical_max'] = str(alt_max) - + f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \ + f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \ + f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \ + f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \ + f"{ds['lat'].min().values} {ds['lon'].min().values}))" + + ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values) + ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values) + ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values) + ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values) + ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values) + ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values) ds.attrs['geospatial_vertical_positive'] = 'up' ds.attrs['time_coverage_start'] = str(ds['time'][0].values) ds.attrs['time_coverage_end'] = str(ds['time'][-1].values) @@ -436,4 +387,4 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']): if 'gps_lon' not in dataset.keys(): return dataset dataset['gps_lon'] = dataset['gps_lon'] * -1 - return dataset + return dataset \ No newline at end of file diff --git a/src/pypromice/qc/github_data_issues.py b/src/pypromice/qc/github_data_issues.py index 41603ed8..fd4d96e0 100644 --- a/src/pypromice/qc/github_data_issues.py +++ b/src/pypromice/qc/github_data_issues.py @@ -65,10 +65,10 @@ def flagNAN(ds_in, for v in varlist: if v in list(ds.keys()): - logger.debug(f'---> flagging {t0} {t1} {v}') + logger.info(f'---> flagging {t0} {t1} {v}') ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1)) else: - logger.debug(f'---> could not flag {v} not in dataset') + logger.info(f'---> could not flag {v} not in dataset') return ds @@ -206,14 +206,13 @@ def adjustData(ds, t1 = pd.to_datetime(t1, utc=True).tz_localize(None) index_slice = dict(time=slice(t0, t1)) + if len(ds_out[var].loc[index_slice].time.time) == 0: - logger.info(f'---> {t0} {t1} {var} {func} {val}') logger.info("Time range does not intersect with dataset") continue - else: - logger.debug(f'---> {t0} {t1} {var} {func} {val}') - + logger.info(f'---> {t0} {t1} {var} {func} {val}') + if func == "add": ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val # flagging adjusted values diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index 963ff786..5f5d55f4 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -83,7 +83,7 @@ def persistence_qc( mask = mask & (df[v]<99) n_masked = mask.sum() n_samples = len(mask) - logger.debug( + logger.info( f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" ) # setting outliers to NaN @@ -96,7 +96,7 @@ def persistence_qc( n_masked = mask.sum() n_samples = len(mask) - logger.debug( + logger.info( f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" ) # setting outliers to NaN