diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index d3272759..3fe042b1 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -9,7 +9,7 @@ import os import xarray as xr -from pypromice.qc.static_qc import staticQC +from pypromice.qc.static_qc import apply_static_qc from pypromice.qc.percentile import percentileQC @@ -56,7 +56,7 @@ def toL2(L1, T_0=273.15, ews=1013.246, ei0=6.1071, eps_overcast=1., - ds = staticQC(ds) # Flag and Remove difference outliers + ds = apply_static_qc(ds) # Flag and Remove difference outliers ds = percentileQC(ds) # Flag and remove percentile outliers T_100 = _getTempK(T_0) diff --git a/src/pypromice/qc/static_qc.py b/src/pypromice/qc/static_qc.py index 5d956190..4df7ebd7 100644 --- a/src/pypromice/qc/static_qc.py +++ b/src/pypromice/qc/static_qc.py @@ -1,10 +1,26 @@ import numpy as np import pandas as pd import xarray as xr +from typing import Mapping, Optional -def staticQC(ds: xr.Dataset) -> xr.Dataset: - ''' +__all__ = [ + "apply_static_qc", + "find_static_regions", +] + +DEFAULT_VARIABLE_THRESHOLDS = { + "t": {"static_limit": 0.001, "diff_period": 1}, + "p": {"static_limit": 0.0001 / 24, "diff_period": 24}, + "rh": {"static_limit": 0.0001 / 24, "diff_period": 24}, +} + + +def apply_static_qc( + ds: xr.Dataset, + variable_thresholds: Optional[Mapping] = None, +) -> xr.Dataset: + """ Detect and filter data points that sems to be static within a certain period. TODO: It could be nice to have a reference to the logger or description of the behaviour here. @@ -13,13 +29,18 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset: Parameters ---------- ds : xr.Dataset - Level 1 datset + Level 1 datset + variable_thresholds : Mapping + Define threshold dict to hold limit values, and the difference values. + Limit values indicate how much a variable has to change to the previous value + diff_period is how many hours a value can stay the same without being set to NaN + * are used to calculate and define all limits, which are then applied to *_u, *_l and *_i Returns ------- ds_out : xr.Dataset Level 1 dataset with difference outliers set to NaN - ''' + """ # the differenceQC is not done on the Windspeed # Optionally examine flagged data by setting make_plots to True @@ -28,22 +49,17 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset: df = ds.to_dataframe() # Switch to pandas - # Define threshold dict to hold limit values, and the difference values. - # Limit values indicate how much a variable has to change to the previous value - # diff_period is how many hours a value can stay the same without being set to NaN - # * are used to calculate and define all limits, which are then applied to *_u, *_l and *_i - - var_threshold = { - 't': {'static_limit': 0.001, 'diff_period': 1}, - 'p': {'static_limit': 0.0001, 'diff_period': 24}, - 'rh': {'static_limit': 0.0001, 'diff_period': 24} - } + if variable_thresholds is None: + variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS - for k in var_threshold.keys(): - - var_all = [k + '_u', k + '_l', k + '_i'] # apply to upper, lower boom, and instant - static_limit = var_threshold[k]['static_limit'] # loading static limit - diff_period = var_threshold[k]['diff_period'] # loading diff period + for k in variable_thresholds.keys(): + var_all = [ + k + "_u", + k + "_l", + k + "_i", + ] # apply to upper, lower boom, and instant + static_limit = variable_thresholds[k]["static_limit"] # loading static limit + diff_period = variable_thresholds[k]["diff_period"] # loading diff period for v in var_all: if v in df: @@ -56,9 +72,7 @@ def staticQC(ds: xr.Dataset) -> xr.Dataset: ds_out = ds_out.assign_attrs(ds.attrs) # Dataset attrs for x in ds_out.data_vars: # variable-specific attrs ds_out[x].attrs = ds[x].attrs - # equivalent to above: - # vals = [xr.DataArray(data=df_out[c], dims=['time'], coords={'time':df_out.index}, attrs=ds[c].attrs) for c in df_out.columns] - # ds_out = xr.Dataset(dict(zip(df_out.columns, vals)), attrs=ds.attrs) + return ds_out