Skip to content

Commit

Permalink
Revert "Feature/smoothing and extrapolating gps coordinates (#271)"
Browse files Browse the repository at this point in the history
This reverts commit e77de0a.
  • Loading branch information
BaptisteVandecrux authored Jul 1, 2024
1 parent e77de0a commit d927f25
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 236 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dependencies:
- setuptools=68.2.2=py38h06a4308_0
- six=1.16.0=pyh6c4a22f_0
- sqlite=3.41.2=h5eee18b_0
- statsmodels=0.13.2=py39h2bbff1b_0
- tbb=2021.8.0=hdb19cb5_0
- threadpoolctl=3.4.0=pyhc1e730c_0
- tk=8.6.12=h1ccaba5_0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"pypromice.qc.percentiles": ["thresholds.csv"],
"pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"],
},
install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'],
install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.13.2'],
# extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']},
entry_points={
'console_scripts': [
Expand Down
251 changes: 113 additions & 138 deletions src/pypromice/process/L2toL3.py

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def parse_arguments_l2tol3(debug_args=None):
parser = ArgumentParser(description="AWS L3 script for the processing L3 "+
"data from L2. An hourly, daily and monthly L3 "+
"data product is outputted to the defined output path")
parser.add_argument('-c', '--config_folder', type=str, required=True,
help='Path to folder with sites configuration (TOML) files')
parser.add_argument('-i', '--inpath', type=str, required=True,
help='Path to Level 2 .nc data file')
parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
Expand All @@ -26,7 +24,7 @@ def parse_arguments_l2tol3(debug_args=None):
args = parser.parse_args(args=debug_args)
return args

def get_l2tol3(config_folder, inpath, outpath, variables, metadata):
def get_l2tol3(inpath, outpath, variables, metadata):
logging.basicConfig(
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
level=logging.INFO,
Expand All @@ -48,7 +46,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata):
l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms'])

# Perform Level 3 processing
l3 = toL3(l2, config_folder)
l3 = toL3(l2)

# Write Level 3 dataset to file if output directory given
v = getVars(variables)
Expand All @@ -61,7 +59,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata):

def main():
args = parse_arguments_l2tol3()
_ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata)
_ = get_l2tol3(args.inpath, args.outpath, args.variables, args.metadata)

if __name__ == "__main__":
main()
13 changes: 3 additions & 10 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
import numpy as np
import pandas as pd
import xarray as xr
logging.basicConfig(
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
level=logging.INFO,
stream=sys.stdout,
)
logger = logging.getLogger(__name__)

def parse_arguments_joinl3(debug_args=None):
Expand Down Expand Up @@ -207,16 +202,16 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me

filepath = os.path.join(folder_l3, stid, stid+'_hour.nc')
isNead = False
if station_info["project"].lower() in ["historical gc-net"]:
if station_info["project"].lower() in ["historical gc-net", "glaciobasis"]:
filepath = os.path.join(folder_gcnet, stid+'.csv')
isNead = True
if not os.path.isfile(filepath):
if not os.path.isfile(filepath):
logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet)
continue

l3, _ = loadArr(filepath, isNead)
list_station_data.append((l3, station_info))

# Sort the list in reverse chronological order so that we start with the latest data
sorted_list_station_data = sorted(list_station_data, key=lambda x: x[0].time.max(), reverse=True)
sorted_stids = [info["stid"] for _, info in sorted_list_station_data]
Expand Down Expand Up @@ -285,8 +280,6 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me


# Assign site id
if not l3_merged:
logger.error('No level 2 data file found for '+site)
l3_merged.attrs['site_id'] = site
l3_merged.attrs['stations'] = ' '.join(sorted_stids)
l3_merged.attrs['level'] = 'L3'
Expand Down
99 changes: 25 additions & 74 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
if 'gps_lon' in d2.keys():
d2 = reformat_lon(d2)
else:
logger.info('%s does not have gps_lon'%name)
logger.info('%s does not have gpd_lon'%name)

# Add variable attributes and metadata
if vars_df is None:
Expand Down Expand Up @@ -100,6 +100,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
writeCSV(out_csv, d2, col_names)

# Write to netcdf file
col_names = col_names + ['lat', 'lon', 'alt']
writeNC(out_nc, d2, col_names)
logger.info(f'Written to {out_csv}')
logger.info(f'Written to {out_nc}')
Expand Down Expand Up @@ -244,24 +245,16 @@ def addMeta(ds, meta):
ds : xarray.Dataset
Dataset with metadata
'''
if 'lon' in ds.keys():
# caluclating average coordinates based on the extra/interpolated coords
for v in ['lat','lon','alt']:
ds.attrs[v+'_avg'] = ds[v].mean().item()
# dropping the less accurate standard coordinates given in the
# raw or tx config files
for v in ['latitude','longitude']:
if v in ds.attrs.keys():
del ds.attrs[v]
elif 'gps_lon' in ds.keys():
# caluclating average coordinates based on the measured coords (can be gappy)
for v in ['gps_lat','gps_lon','gps_alt']:
ds.attrs[v+'_avg'] = ds[v].mean().item()
# dropping the less accurate standard coordinates given in the
# raw or tx config files
for v in ['latitude','longitude']:
if v in ds.attrs.keys():
del ds.attrs[v]
if 'gps_lon' in ds.keys():
ds['lon'] = ds['gps_lon'].mean()
ds['lon'].attrs = ds['gps_lon'].attrs

ds['lat'] = ds['gps_lat'].mean()
ds['lat'].attrs = ds['gps_lat'].attrs

ds['alt'] = ds['gps_alt'].mean()
ds['alt'].attrs = ds['gps_alt'].attrs

# Attribute convention for data discovery
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3

Expand Down Expand Up @@ -290,61 +283,19 @@ def addMeta(ds, meta):
ds.attrs['date_metadata_modified'] = ds.attrs['date_created']
ds.attrs['processing_level'] = ds.attrs['level'].replace('L','level ')


if 'lat' in ds.keys():
lat_min = ds['lat'].min().values
lat_max = ds['lat'].max().values
elif 'gps_lat' in ds.keys():
lat_min = ds['gps_lat'].min().values
lat_max = ds['gps_lat'].max().values
elif 'latitude' in ds.attrs.keys():
lat_min = ds.attrs['latitude']
lat_max = ds.attrs['latitude']
else:
lat_min =np.nan
lat_max = np.nan


if 'lon' in ds.keys():
lon_min = ds['lon'].min().values
lon_max = ds['lon'].max().values
elif 'gps_lon' in ds.keys():
lon_min = ds['gps_lon'].min().values
lon_max = ds['gps_lon'].max().values
elif 'longitude' in ds.attrs.keys():
lon_min = ds.attrs['longitude']
lon_max = ds.attrs['longitude']
else:
lon_min =np.nan
lon_max = np.nan

if 'alt' in ds.keys():
alt_min = ds['alt'].min().values
alt_max = ds['alt'].max().values
elif 'gps_alt' in ds.keys():
alt_min = ds['gps_alt'].min().values
alt_max = ds['gps_alt'].max().values
elif 'altitude' in ds.attrs.keys():
alt_min = ds.attrs['altitude']
alt_max = ds.attrs['altitude']
else:
alt_min =np.nan
alt_max = np.nan

ds.attrs['geospatial_bounds'] = "POLYGON((" + \
f"{lat_min} {lon_min}, " + \
f"{lat_min} {lon_max}, " + \
f"{lat_max} {lon_max}, " + \
f"{lat_max} {lon_min}, " + \
f"{lat_min} {lon_min}))"

ds.attrs['geospatial_lat_min'] = str(lat_min)
ds.attrs['geospatial_lat_max'] = str(lat_max)
ds.attrs['geospatial_lon_min'] = str(lon_min)
ds.attrs['geospatial_lon_max'] = str(lon_max)
ds.attrs['geospatial_vertical_min'] = str(alt_min)
ds.attrs['geospatial_vertical_max'] = str(alt_max)

f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].min().values}))"

ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
ds.attrs['geospatial_vertical_positive'] = 'up'
ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
Expand Down Expand Up @@ -436,4 +387,4 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']):
if 'gps_lon' not in dataset.keys():
return dataset
dataset['gps_lon'] = dataset['gps_lon'] * -1
return dataset
return dataset
11 changes: 5 additions & 6 deletions src/pypromice/qc/github_data_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def flagNAN(ds_in,

for v in varlist:
if v in list(ds.keys()):
logger.debug(f'---> flagging {t0} {t1} {v}')
logger.info(f'---> flagging {t0} {t1} {v}')
ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1))
else:
logger.debug(f'---> could not flag {v} not in dataset')
logger.info(f'---> could not flag {v} not in dataset')

return ds

Expand Down Expand Up @@ -206,14 +206,13 @@ def adjustData(ds,
t1 = pd.to_datetime(t1, utc=True).tz_localize(None)

index_slice = dict(time=slice(t0, t1))

if len(ds_out[var].loc[index_slice].time.time) == 0:
logger.info(f'---> {t0} {t1} {var} {func} {val}')
logger.info("Time range does not intersect with dataset")
continue

else:
logger.debug(f'---> {t0} {t1} {var} {func} {val}')

logger.info(f'---> {t0} {t1} {var} {func} {val}')

if func == "add":
ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
# flagging adjusted values
Expand Down
4 changes: 2 additions & 2 deletions src/pypromice/qc/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def persistence_qc(
mask = mask & (df[v]<99)
n_masked = mask.sum()
n_samples = len(mask)
logger.debug(
logger.info(
f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples"
)
# setting outliers to NaN
Expand All @@ -96,7 +96,7 @@ def persistence_qc(

n_masked = mask.sum()
n_samples = len(mask)
logger.debug(
logger.info(
f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples"
)
# setting outliers to NaN
Expand Down

0 comments on commit d927f25

Please sign in to comment.