Skip to content

Commit

Permalink
printing nan fields in L2, list old variables to drop on join_l3
Browse files Browse the repository at this point in the history
fixed warning in join_l3
  • Loading branch information
BaptisteVandecrux committed Jul 2, 2024
1 parent 16fdf84 commit 1bfe305
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 28 deletions.
16 changes: 10 additions & 6 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
import numpy as np
import xarray as xr
import toml
import toml, os
from sklearn.linear_model import LinearRegression

import logging
Expand Down Expand Up @@ -128,11 +128,15 @@ def gpsCoordinatePostprocessing(ds, var, config_folder='../aws-l0/metadata/stati
# fetching the station relocation dates at which the coordinates will/should
# have a break
config_file = config_folder +"/" + ds.attrs['station_id'] + ".toml"
with open(config_file, "r") as f:
config_data = toml.load(f)

# Extract station relocations from the TOML data
station_relocations = config_data.get("station_relocation", [])
if os.path.isfile(config_file):
with open(config_file, "r") as f:
config_data = toml.load(f)

# Extract station relocations from the TOML data
station_relocations = config_data.get("station_relocation", [])
else:
station_relocations = []
logger.warning('Did not find config file for '+ds.attrs['station_id']+'. Assuming no station relocation.')

# Convert the ISO8601 strings to pandas datetime objects
breaks = [pd.to_datetime(date_str) for date_str in station_relocations]
Expand Down
1 change: 1 addition & 0 deletions src/pypromice/process/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def getL1(self):
logger.info('Level 1 processing...')
self.L0 = [utilities.addBasicMeta(item, self.vars) for item in self.L0]
self.L1 = [toL1(item, self.vars) for item in self.L0]
self.L1.reverse()
self.L1A = reduce(xr.Dataset.combine_first, self.L1)

def getL2(self):
Expand Down
42 changes: 25 additions & 17 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,20 @@ def readNead(infile):
# combining thermocouple and CS100 temperatures
ds['TA1'] = ds['TA1'].combine_first(ds['TA3'])
ds['TA2'] = ds['TA2'].combine_first(ds['TA4'])

ds=ds.rename(var_name)

standard_vars_to_drop = ["NR", "TA3", "TA4", "TA5", "NR_cor",
"z_surf_1", "z_surf_2", "z_surf_combined",
"TA2m", "RH2m", "VW10m", "SZA", "SAA",
"depth_t_i_1", "depth_t_i_2", "depth_t_i_3", "depth_t_i_4", "depth_t_i_5",
"depth_t_i_6", "depth_t_i_7", "depth_t_i_8", "depth_t_i_9", "depth_t_i_10", "t_i_10m"
]
standard_vars_to_drop = standard_vars_to_drop + [v for v in list(ds.keys()) if v.endswith("_adj_flag")]

# Drop the variables if they are present in the dataset
ds = ds.drop_vars([var for var in standard_vars_to_drop if var in ds])

ds=ds.rename({'timestamp':'time'})
return ds

Expand All @@ -121,7 +133,8 @@ def loadArr(infile, isNead):
ds = xr.Dataset.from_dataframe(df)

elif infile.split('.')[-1].lower() in 'nc':
ds = xr.open_dataset(infile)
with xr.open_dataset(infile) as ds:
ds.load()
# Remove encoding attributes from NetCDF
for varname in ds.variables:
if ds[varname].encoding!={}:
Expand Down Expand Up @@ -211,10 +224,17 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
filepath = os.path.join(folder_gcnet, stid+'.csv')
isNead = True
if not os.path.isfile(filepath):
logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet)
logger.info(stid+' was listed as station but could not be found in '+folder_l3+' nor '+folder_gcnet)
continue

l3, _ = loadArr(filepath, isNead)
l3, _ = loadArr(filepath, isNead)

# removing specific variable from a given file
specific_vars_to_drop = station_info.get("skipped_variables",[])
if len(specific_vars_to_drop)>0:
logger.info("Skipping %s from %s"%(specific_vars_to_drop, stid))
l3 = l3.drop_vars([var for var in specific_vars_to_drop if var in l3])

list_station_data.append((l3, station_info))

# Sort the list in reverse chronological order so that we start with the latest data
Expand Down Expand Up @@ -251,19 +271,7 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
for v in l3_merged.data_vars:
if v not in l3.data_vars:
l3[v] = l3.t_u*np.nan

# if l3 (older data) has variables that does not have l3_merged (newer data)
# then they are removed from l3
list_dropped = []
for v in l3.data_vars:
if v not in l3_merged.data_vars:
if v != 'z_stake':
list_dropped.append(v)
l3 = l3.drop(v)
else:
l3_merged[v] = ('time', l3_merged.t_u.data*np.nan)
logger.info('Unused variables in older dataset: '+' '.join(list_dropped))


# saving attributes of station under an attribute called $stid
st_attrs = l3_merged.attrs.get('stations_attributes', {})
st_attrs[stid] = l3.attrs.copy()
Expand Down
12 changes: 10 additions & 2 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
d2 = roundValues(d2, vars_df)

# Get variable names to write out
col_names = getColNames(vars_df, d2, remove_nan_fields=True)
if 'site_id' in d2.attrs.keys():
remove_nan_fields = True
else:
remove_nan_fields = False
col_names = getColNames(vars_df, d2, remove_nan_fields=remove_nan_fields)

# Define filename based on resample rate
t = int(pd.Timedelta((d2['time'][1] - d2['time'][0]).values).total_seconds())
Expand Down Expand Up @@ -256,12 +260,16 @@ def addMeta(ds, meta):
elif 'gps_lon' in ds.keys():
# caluclating average coordinates based on the measured coords (can be gappy)
for v in ['gps_lat','gps_lon','gps_alt']:
ds.attrs[v+'_avg'] = ds[v].mean().item()
if v in ds.keys():
ds.attrs[v+'_avg'] = ds[v].mean().item()
else:
ds.attrs[v+'_avg'] = np.nan
# dropping the less accurate standard coordinates given in the
# raw or tx config files
for v in ['latitude','longitude']:
if v in ds.attrs.keys():
del ds.attrs[v]

# Attribute convention for data discovery
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3

Expand Down
6 changes: 3 additions & 3 deletions src/pypromice/resources/variable_aliases_GC-Net.csv
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ t_i_11,
tilt_x,
tilt_y,
rot,
gps_lat,latitude
gps_lon,longitude
gps_alt,elevation
lat,latitude
lon,longitude
alt,elevation
gps_time,
gps_geounit,
gps_hdop,
Expand Down

0 comments on commit 1bfe305

Please sign in to comment.