Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert bad merge #272

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dependencies:
- setuptools=68.2.2=py38h06a4308_0
- six=1.16.0=pyh6c4a22f_0
- sqlite=3.41.2=h5eee18b_0
- statsmodels=0.13.2=py39h2bbff1b_0
- tbb=2021.8.0=hdb19cb5_0
- threadpoolctl=3.4.0=pyhc1e730c_0
- tk=8.6.12=h1ccaba5_0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"pypromice.qc.percentiles": ["thresholds.csv"],
"pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"],
},
install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'],
install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0', 'statsmodels==0.13.2'],
# extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']},
entry_points={
'console_scripts': [
Expand Down
251 changes: 113 additions & 138 deletions src/pypromice/process/L2toL3.py

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ def parse_arguments_l2tol3(debug_args=None):
parser = ArgumentParser(description="AWS L3 script for the processing L3 "+
"data from L2. An hourly, daily and monthly L3 "+
"data product is outputted to the defined output path")
parser.add_argument('-c', '--config_folder', type=str, required=True,
help='Path to folder with sites configuration (TOML) files')
parser.add_argument('-i', '--inpath', type=str, required=True,
help='Path to Level 2 .nc data file')
parser.add_argument('-o', '--outpath', default=None, type=str, required=False,
Expand All @@ -26,7 +24,7 @@ def parse_arguments_l2tol3(debug_args=None):
args = parser.parse_args(args=debug_args)
return args

def get_l2tol3(config_folder, inpath, outpath, variables, metadata):
def get_l2tol3(inpath, outpath, variables, metadata):
logging.basicConfig(
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
level=logging.INFO,
Expand All @@ -48,7 +46,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata):
l2.attrs['number_of_booms'] = int(l2.attrs['number_of_booms'])

# Perform Level 3 processing
l3 = toL3(l2, config_folder)
l3 = toL3(l2)

# Write Level 3 dataset to file if output directory given
v = getVars(variables)
Expand All @@ -61,7 +59,7 @@ def get_l2tol3(config_folder, inpath, outpath, variables, metadata):

def main():
args = parse_arguments_l2tol3()
_ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata)
_ = get_l2tol3(args.inpath, args.outpath, args.variables, args.metadata)

if __name__ == "__main__":
main()
13 changes: 3 additions & 10 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
import numpy as np
import pandas as pd
import xarray as xr
logging.basicConfig(
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
level=logging.INFO,
stream=sys.stdout,
)
logger = logging.getLogger(__name__)

def parse_arguments_joinl3(debug_args=None):
Expand Down Expand Up @@ -207,16 +202,16 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me

filepath = os.path.join(folder_l3, stid, stid+'_hour.nc')
isNead = False
if station_info["project"].lower() in ["historical gc-net"]:
if station_info["project"].lower() in ["historical gc-net", "glaciobasis"]:
filepath = os.path.join(folder_gcnet, stid+'.csv')
isNead = True
if not os.path.isfile(filepath):
if not os.path.isfile(filepath):
logger.info(stid+' is from an project '+folder_l3+' or '+folder_gcnet)
continue

l3, _ = loadArr(filepath, isNead)
list_station_data.append((l3, station_info))

# Sort the list in reverse chronological order so that we start with the latest data
sorted_list_station_data = sorted(list_station_data, key=lambda x: x[0].time.max(), reverse=True)
sorted_stids = [info["stid"] for _, info in sorted_list_station_data]
Expand Down Expand Up @@ -285,8 +280,6 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me


# Assign site id
if not l3_merged:
logger.error('No level 2 data file found for '+site)
l3_merged.attrs['site_id'] = site
l3_merged.attrs['stations'] = ' '.join(sorted_stids)
l3_merged.attrs['level'] = 'L3'
Expand Down
99 changes: 25 additions & 74 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
if 'gps_lon' in d2.keys():
d2 = reformat_lon(d2)
else:
logger.info('%s does not have gps_lon'%name)
logger.info('%s does not have gpd_lon'%name)

# Add variable attributes and metadata
if vars_df is None:
Expand Down Expand Up @@ -100,6 +100,7 @@ def prepare_and_write(dataset, outpath, vars_df=None, meta_dict=None, time='60mi
writeCSV(out_csv, d2, col_names)

# Write to netcdf file
col_names = col_names + ['lat', 'lon', 'alt']
writeNC(out_nc, d2, col_names)
logger.info(f'Written to {out_csv}')
logger.info(f'Written to {out_nc}')
Expand Down Expand Up @@ -244,24 +245,16 @@ def addMeta(ds, meta):
ds : xarray.Dataset
Dataset with metadata
'''
if 'lon' in ds.keys():
# caluclating average coordinates based on the extra/interpolated coords
for v in ['lat','lon','alt']:
ds.attrs[v+'_avg'] = ds[v].mean().item()
# dropping the less accurate standard coordinates given in the
# raw or tx config files
for v in ['latitude','longitude']:
if v in ds.attrs.keys():
del ds.attrs[v]
elif 'gps_lon' in ds.keys():
# caluclating average coordinates based on the measured coords (can be gappy)
for v in ['gps_lat','gps_lon','gps_alt']:
ds.attrs[v+'_avg'] = ds[v].mean().item()
# dropping the less accurate standard coordinates given in the
# raw or tx config files
for v in ['latitude','longitude']:
if v in ds.attrs.keys():
del ds.attrs[v]
if 'gps_lon' in ds.keys():
ds['lon'] = ds['gps_lon'].mean()
ds['lon'].attrs = ds['gps_lon'].attrs

ds['lat'] = ds['gps_lat'].mean()
ds['lat'].attrs = ds['gps_lat'].attrs

ds['alt'] = ds['gps_alt'].mean()
ds['alt'].attrs = ds['gps_alt'].attrs

# Attribute convention for data discovery
# https://wiki.esipfed.org/Attribute_Convention_for_Data_Discovery_1-3

Expand Down Expand Up @@ -290,61 +283,19 @@ def addMeta(ds, meta):
ds.attrs['date_metadata_modified'] = ds.attrs['date_created']
ds.attrs['processing_level'] = ds.attrs['level'].replace('L','level ')


if 'lat' in ds.keys():
lat_min = ds['lat'].min().values
lat_max = ds['lat'].max().values
elif 'gps_lat' in ds.keys():
lat_min = ds['gps_lat'].min().values
lat_max = ds['gps_lat'].max().values
elif 'latitude' in ds.attrs.keys():
lat_min = ds.attrs['latitude']
lat_max = ds.attrs['latitude']
else:
lat_min =np.nan
lat_max = np.nan


if 'lon' in ds.keys():
lon_min = ds['lon'].min().values
lon_max = ds['lon'].max().values
elif 'gps_lon' in ds.keys():
lon_min = ds['gps_lon'].min().values
lon_max = ds['gps_lon'].max().values
elif 'longitude' in ds.attrs.keys():
lon_min = ds.attrs['longitude']
lon_max = ds.attrs['longitude']
else:
lon_min =np.nan
lon_max = np.nan

if 'alt' in ds.keys():
alt_min = ds['alt'].min().values
alt_max = ds['alt'].max().values
elif 'gps_alt' in ds.keys():
alt_min = ds['gps_alt'].min().values
alt_max = ds['gps_alt'].max().values
elif 'altitude' in ds.attrs.keys():
alt_min = ds.attrs['altitude']
alt_max = ds.attrs['altitude']
else:
alt_min =np.nan
alt_max = np.nan

ds.attrs['geospatial_bounds'] = "POLYGON((" + \
f"{lat_min} {lon_min}, " + \
f"{lat_min} {lon_max}, " + \
f"{lat_max} {lon_max}, " + \
f"{lat_max} {lon_min}, " + \
f"{lat_min} {lon_min}))"

ds.attrs['geospatial_lat_min'] = str(lat_min)
ds.attrs['geospatial_lat_max'] = str(lat_max)
ds.attrs['geospatial_lon_min'] = str(lon_min)
ds.attrs['geospatial_lon_max'] = str(lon_max)
ds.attrs['geospatial_vertical_min'] = str(alt_min)
ds.attrs['geospatial_vertical_max'] = str(alt_max)

f"{ds['lat'].min().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].max().values}, " + \
f"{ds['lat'].max().values} {ds['lon'].min().values}, " + \
f"{ds['lat'].min().values} {ds['lon'].min().values}))"

ds.attrs['geospatial_lat_min'] = str(ds['lat'].min().values)
ds.attrs['geospatial_lat_max'] = str(ds['lat'].max().values)
ds.attrs['geospatial_lon_min'] = str(ds['lon'].min().values)
ds.attrs['geospatial_lon_max'] = str(ds['lon'].max().values)
ds.attrs['geospatial_vertical_min'] = str(ds['alt'].min().values)
ds.attrs['geospatial_vertical_max'] = str(ds['alt'].max().values)
ds.attrs['geospatial_vertical_positive'] = 'up'
ds.attrs['time_coverage_start'] = str(ds['time'][0].values)
ds.attrs['time_coverage_end'] = str(ds['time'][-1].values)
Expand Down Expand Up @@ -436,4 +387,4 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']):
if 'gps_lon' not in dataset.keys():
return dataset
dataset['gps_lon'] = dataset['gps_lon'] * -1
return dataset
return dataset
11 changes: 5 additions & 6 deletions src/pypromice/qc/github_data_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def flagNAN(ds_in,

for v in varlist:
if v in list(ds.keys()):
logger.debug(f'---> flagging {t0} {t1} {v}')
logger.info(f'---> flagging {t0} {t1} {v}')
ds[v] = ds[v].where((ds['time'] < t0) | (ds['time'] > t1))
else:
logger.debug(f'---> could not flag {v} not in dataset')
logger.info(f'---> could not flag {v} not in dataset')

return ds

Expand Down Expand Up @@ -206,14 +206,13 @@ def adjustData(ds,
t1 = pd.to_datetime(t1, utc=True).tz_localize(None)

index_slice = dict(time=slice(t0, t1))

if len(ds_out[var].loc[index_slice].time.time) == 0:
logger.info(f'---> {t0} {t1} {var} {func} {val}')
logger.info("Time range does not intersect with dataset")
continue

else:
logger.debug(f'---> {t0} {t1} {var} {func} {val}')

logger.info(f'---> {t0} {t1} {var} {func} {val}')

if func == "add":
ds_out[var].loc[index_slice] = ds_out[var].loc[index_slice].values + val
# flagging adjusted values
Expand Down
4 changes: 2 additions & 2 deletions src/pypromice/qc/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def persistence_qc(
mask = mask & (df[v]<99)
n_masked = mask.sum()
n_samples = len(mask)
logger.debug(
logger.info(
f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples"
)
# setting outliers to NaN
Expand All @@ -96,7 +96,7 @@ def persistence_qc(

n_masked = mask.sum()
n_samples = len(mask)
logger.debug(
logger.info(
f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples"
)
# setting outliers to NaN
Expand Down
Loading