From bf18598e61768def02254598ea21de1970a7f51a Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Thu, 14 Sep 2023 10:42:53 +0200 Subject: [PATCH] Renamed threshold parameters: * diff_period -> period * static_limit -> max_diff --- src/pypromice/qc/static_qc.py | 24 ++++++++++++------------ src/pypromice/qc/static_qc_test.py | 18 +++++++++--------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/pypromice/qc/static_qc.py b/src/pypromice/qc/static_qc.py index 4df7ebd7..4d9c2e59 100644 --- a/src/pypromice/qc/static_qc.py +++ b/src/pypromice/qc/static_qc.py @@ -10,9 +10,9 @@ ] DEFAULT_VARIABLE_THRESHOLDS = { - "t": {"static_limit": 0.001, "diff_period": 1}, - "p": {"static_limit": 0.0001 / 24, "diff_period": 24}, - "rh": {"static_limit": 0.0001 / 24, "diff_period": 24}, + "t": {"max_diff": 0.001, "period": 1}, + "p": {"max_diff": 0.0001 / 24, "period": 24}, + "rh": {"max_diff": 0.0001 / 24, "period": 24}, } @@ -33,7 +33,7 @@ def apply_static_qc( variable_thresholds : Mapping Define threshold dict to hold limit values, and the difference values. Limit values indicate how much a variable has to change to the previous value - diff_period is how many hours a value can stay the same without being set to NaN + period is how many hours a value can stay the same without being set to NaN * are used to calculate and define all limits, which are then applied to *_u, *_l and *_i Returns @@ -58,12 +58,12 @@ def apply_static_qc( k + "_l", k + "_i", ] # apply to upper, lower boom, and instant - static_limit = variable_thresholds[k]["static_limit"] # loading static limit - diff_period = variable_thresholds[k]["diff_period"] # loading diff period + max_diff = variable_thresholds[k]["max_diff"] # loading static limit + period = variable_thresholds[k]["period"] # loading diff period for v in var_all: if v in df: - mask = find_static_regions(df[v], diff_period, static_limit) + mask = find_static_regions(df[v], period, max_diff) # setting outliers to NaN df.loc[mask, v] = np.nan @@ -78,8 +78,8 @@ def apply_static_qc( def find_static_regions( data: pd.Series, - diff_period: int, - static_limit: float, + period: int, + max_diff: float, ) -> pd.Series: """ Algorithm that ensures values can stay the same within the outliers_mask @@ -88,8 +88,8 @@ def find_static_regions( # Indexing is significantly faster on numpy arrays that pandas series diff = np.array(diff) outliers_mask = np.zeros_like(diff, dtype=bool) - for i in range(len(outliers_mask) - diff_period + 1): - i_end = i + diff_period - if max(diff[i:i_end]) < static_limit: + for i in range(len(outliers_mask) - period + 1): + i_end = i + period + if max(diff[i:i_end]) < max_diff: outliers_mask[i:i_end] = True return pd.Series(index=data.index, data=outliers_mask) diff --git a/src/pypromice/qc/static_qc_test.py b/src/pypromice/qc/static_qc_test.py index 82643502..b556f565 100644 --- a/src/pypromice/qc/static_qc_test.py +++ b/src/pypromice/qc/static_qc_test.py @@ -42,7 +42,7 @@ def test_1_hour_static(self): index = 24 series.iloc[index] = series.iloc[index - 1] - mask = find_static_regions(series, diff_period=1, static_limit=0.001) + mask = find_static_regions(series, period=1, max_diff=0.001) self.assertEqual(1, mask.sum()) self.assertTrue(1, mask.iloc[index]) @@ -56,7 +56,7 @@ def test_1_hour_second_index(self): index = 1 series.iloc[index] = series.iloc[index - 1] - mask = find_static_regions(series, diff_period=1, static_limit=0.001) + mask = find_static_regions(series, period=1, max_diff=0.001) self.assertEqual(1, mask.sum()) self.assertTrue(1, mask.iloc[index]) @@ -70,7 +70,7 @@ def test_1_hour_last_index(self): index = -1 series.iloc[index] = series.iloc[index - 1] - mask = find_static_regions(series, diff_period=1, static_limit=0.001) + mask = find_static_regions(series, period=1, max_diff=0.001) self.assertEqual(1, mask.sum()) self.assertTrue(1, mask.iloc[index]) @@ -82,7 +82,7 @@ def test_no_static_period(self): freq="1h", ) - static_mask = find_static_regions(series, diff_period=1, static_limit=0.001) + static_mask = find_static_regions(series, period=1, max_diff=0.001) pd.testing.assert_series_equal( pd.Series(index=static_mask.index, data=False), @@ -90,7 +90,7 @@ def test_no_static_period(self): check_names=False, ) - def test_static_period_longer_than_diff(self): + def test_static_period_longer_than_period_threshold(self): series = get_random_timeseries( start=get_random_datetime(), period=datetime.timedelta(days=100), @@ -101,7 +101,7 @@ def test_static_period_longer_than_diff(self): index_end = index_start + index_length series.iloc[index_start:index_end] = series.iloc[index_start - 1] - static_mask = find_static_regions(series, diff_period=24, static_limit=0.001) + static_mask = find_static_regions(series, period=24, max_diff=0.001) self.assertEqual( index_length, @@ -112,7 +112,7 @@ def test_static_period_longer_than_diff(self): static_mask.iloc[index_start:index_end].sum(), ) - def test_diff_period_longer_than_static_period(self): + def test_period_threshold_longer_than_static_period(self): series = get_random_timeseries( start=get_random_datetime(), period=datetime.timedelta(days=100), @@ -123,7 +123,7 @@ def test_diff_period_longer_than_static_period(self): index_end = index_start + index_length series.iloc[index_start:index_end] = series.iloc[index_start - 1] - static_mask = find_static_regions(series, diff_period=31, static_limit=0.001) + static_mask = find_static_regions(series, period=31, max_diff=0.001) self.assertEqual(0, static_mask.sum()) @@ -136,7 +136,7 @@ def test_static_period_at_the_end(self): index_length = 14 series.iloc[-index_length:] = series.iloc[-index_length - 1] - static_mask = find_static_regions(series, diff_period=10, static_limit=0.001) + static_mask = find_static_regions(series, period=10, max_diff=0.001) self.assertEqual(index_length, static_mask.sum()) self.assertEqual(index_length, static_mask.iloc[-index_length:].sum())