Skip to content

Commit

Permalink
Updated and renamed staticQC->apply_static_qc
Browse files Browse the repository at this point in the history
* Added thresholds as input parameters with default values
* Cleaned up out commented code
* Applied black formatting on file
  • Loading branch information
ladsmund committed Sep 14, 2023
1 parent 476b891 commit 6eeb2a2
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 24 deletions.
4 changes: 2 additions & 2 deletions src/pypromice/process/L1toL2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
import xarray as xr

from pypromice.qc.static_qc import staticQC
from pypromice.qc.static_qc import apply_static_qc
from pypromice.qc.percentile import percentileQC


Expand Down Expand Up @@ -56,7 +56,7 @@ def toL2(L1, T_0=273.15, ews=1013.246, ei0=6.1071, eps_overcast=1.,



ds = staticQC(ds) # Flag and Remove difference outliers
ds = apply_static_qc(ds) # Flag and Remove difference outliers
ds = percentileQC(ds) # Flag and remove percentile outliers

T_100 = _getTempK(T_0)
Expand Down
58 changes: 36 additions & 22 deletions src/pypromice/qc/static_qc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import numpy as np
import pandas as pd
import xarray as xr
from typing import Mapping, Optional


def staticQC(ds: xr.Dataset) -> xr.Dataset:
'''
__all__ = [
"apply_static_qc",
"find_static_regions",
]

DEFAULT_VARIABLE_THRESHOLDS = {
"t": {"static_limit": 0.001, "diff_period": 1},
"p": {"static_limit": 0.0001 / 24, "diff_period": 24},
"rh": {"static_limit": 0.0001 / 24, "diff_period": 24},
}


def apply_static_qc(
ds: xr.Dataset,
variable_thresholds: Optional[Mapping] = None,
) -> xr.Dataset:
"""
Detect and filter data points that sems to be static within a certain period.
TODO: It could be nice to have a reference to the logger or description of the behaviour here.
Expand All @@ -13,13 +29,18 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset:
Parameters
----------
ds : xr.Dataset
Level 1 datset
Level 1 datset
variable_thresholds : Mapping
Define threshold dict to hold limit values, and the difference values.
Limit values indicate how much a variable has to change to the previous value
diff_period is how many hours a value can stay the same without being set to NaN
* are used to calculate and define all limits, which are then applied to *_u, *_l and *_i
Returns
-------
ds_out : xr.Dataset
Level 1 dataset with difference outliers set to NaN
'''
"""

# the differenceQC is not done on the Windspeed
# Optionally examine flagged data by setting make_plots to True
Expand All @@ -28,22 +49,17 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset:

df = ds.to_dataframe() # Switch to pandas

# Define threshold dict to hold limit values, and the difference values.
# Limit values indicate how much a variable has to change to the previous value
# diff_period is how many hours a value can stay the same without being set to NaN
# * are used to calculate and define all limits, which are then applied to *_u, *_l and *_i

var_threshold = {
't': {'static_limit': 0.001, 'diff_period': 1},
'p': {'static_limit': 0.0001, 'diff_period': 24},
'rh': {'static_limit': 0.0001, 'diff_period': 24}
}
if variable_thresholds is None:
variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS

for k in var_threshold.keys():

var_all = [k + '_u', k + '_l', k + '_i'] # apply to upper, lower boom, and instant
static_limit = var_threshold[k]['static_limit'] # loading static limit
diff_period = var_threshold[k]['diff_period'] # loading diff period
for k in variable_thresholds.keys():
var_all = [
k + "_u",
k + "_l",
k + "_i",
] # apply to upper, lower boom, and instant
static_limit = variable_thresholds[k]["static_limit"] # loading static limit
diff_period = variable_thresholds[k]["diff_period"] # loading diff period

for v in var_all:
if v in df:
Expand All @@ -56,9 +72,7 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset:
ds_out = ds_out.assign_attrs(ds.attrs) # Dataset attrs
for x in ds_out.data_vars: # variable-specific attrs
ds_out[x].attrs = ds[x].attrs
# equivalent to above:
# vals = [xr.DataArray(data=df_out[c], dims=['time'], coords={'time':df_out.index}, attrs=ds[c].attrs) for c in df_out.columns]
# ds_out = xr.Dataset(dict(zip(df_out.columns, vals)), attrs=ds.attrs)

return ds_out


Expand Down

0 comments on commit 6eeb2a2

Please sign in to comment.