Skip to content

Commit

Permalink
Renamed threshold parameters:
Browse files Browse the repository at this point in the history
* diff_period -> period
* static_limit -> max_diff
  • Loading branch information
ladsmund committed Sep 14, 2023
1 parent 6eeb2a2 commit bf18598
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 21 deletions.
24 changes: 12 additions & 12 deletions src/pypromice/qc/static_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
]

DEFAULT_VARIABLE_THRESHOLDS = {
"t": {"static_limit": 0.001, "diff_period": 1},
"p": {"static_limit": 0.0001 / 24, "diff_period": 24},
"rh": {"static_limit": 0.0001 / 24, "diff_period": 24},
"t": {"max_diff": 0.001, "period": 1},
"p": {"max_diff": 0.0001 / 24, "period": 24},
"rh": {"max_diff": 0.0001 / 24, "period": 24},
}


Expand All @@ -33,7 +33,7 @@ def apply_static_qc(
variable_thresholds : Mapping
Define threshold dict to hold limit values, and the difference values.
Limit values indicate how much a variable has to change to the previous value
diff_period is how many hours a value can stay the same without being set to NaN
period is how many hours a value can stay the same without being set to NaN
* are used to calculate and define all limits, which are then applied to *_u, *_l and *_i
Returns
Expand All @@ -58,12 +58,12 @@ def apply_static_qc(
k + "_l",
k + "_i",
] # apply to upper, lower boom, and instant
static_limit = variable_thresholds[k]["static_limit"] # loading static limit
diff_period = variable_thresholds[k]["diff_period"] # loading diff period
max_diff = variable_thresholds[k]["max_diff"] # loading static limit
period = variable_thresholds[k]["period"] # loading diff period

for v in var_all:
if v in df:
mask = find_static_regions(df[v], diff_period, static_limit)
mask = find_static_regions(df[v], period, max_diff)
# setting outliers to NaN
df.loc[mask, v] = np.nan

Expand All @@ -78,8 +78,8 @@ def apply_static_qc(

def find_static_regions(
data: pd.Series,
diff_period: int,
static_limit: float,
period: int,
max_diff: float,
) -> pd.Series:
"""
Algorithm that ensures values can stay the same within the outliers_mask
Expand All @@ -88,8 +88,8 @@ def find_static_regions(
# Indexing is significantly faster on numpy arrays that pandas series
diff = np.array(diff)
outliers_mask = np.zeros_like(diff, dtype=bool)
for i in range(len(outliers_mask) - diff_period + 1):
i_end = i + diff_period
if max(diff[i:i_end]) < static_limit:
for i in range(len(outliers_mask) - period + 1):
i_end = i + period
if max(diff[i:i_end]) < max_diff:
outliers_mask[i:i_end] = True
return pd.Series(index=data.index, data=outliers_mask)
18 changes: 9 additions & 9 deletions src/pypromice/qc/static_qc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_1_hour_static(self):
index = 24
series.iloc[index] = series.iloc[index - 1]

mask = find_static_regions(series, diff_period=1, static_limit=0.001)
mask = find_static_regions(series, period=1, max_diff=0.001)

self.assertEqual(1, mask.sum())
self.assertTrue(1, mask.iloc[index])
Expand All @@ -56,7 +56,7 @@ def test_1_hour_second_index(self):
index = 1
series.iloc[index] = series.iloc[index - 1]

mask = find_static_regions(series, diff_period=1, static_limit=0.001)
mask = find_static_regions(series, period=1, max_diff=0.001)

self.assertEqual(1, mask.sum())
self.assertTrue(1, mask.iloc[index])
Expand All @@ -70,7 +70,7 @@ def test_1_hour_last_index(self):
index = -1
series.iloc[index] = series.iloc[index - 1]

mask = find_static_regions(series, diff_period=1, static_limit=0.001)
mask = find_static_regions(series, period=1, max_diff=0.001)

self.assertEqual(1, mask.sum())
self.assertTrue(1, mask.iloc[index])
Expand All @@ -82,15 +82,15 @@ def test_no_static_period(self):
freq="1h",
)

static_mask = find_static_regions(series, diff_period=1, static_limit=0.001)
static_mask = find_static_regions(series, period=1, max_diff=0.001)

pd.testing.assert_series_equal(
pd.Series(index=static_mask.index, data=False),
static_mask,
check_names=False,
)

def test_static_period_longer_than_diff(self):
def test_static_period_longer_than_period_threshold(self):
series = get_random_timeseries(
start=get_random_datetime(),
period=datetime.timedelta(days=100),
Expand All @@ -101,7 +101,7 @@ def test_static_period_longer_than_diff(self):
index_end = index_start + index_length
series.iloc[index_start:index_end] = series.iloc[index_start - 1]

static_mask = find_static_regions(series, diff_period=24, static_limit=0.001)
static_mask = find_static_regions(series, period=24, max_diff=0.001)

self.assertEqual(
index_length,
Expand All @@ -112,7 +112,7 @@ def test_static_period_longer_than_diff(self):
static_mask.iloc[index_start:index_end].sum(),
)

def test_diff_period_longer_than_static_period(self):
def test_period_threshold_longer_than_static_period(self):
series = get_random_timeseries(
start=get_random_datetime(),
period=datetime.timedelta(days=100),
Expand All @@ -123,7 +123,7 @@ def test_diff_period_longer_than_static_period(self):
index_end = index_start + index_length
series.iloc[index_start:index_end] = series.iloc[index_start - 1]

static_mask = find_static_regions(series, diff_period=31, static_limit=0.001)
static_mask = find_static_regions(series, period=31, max_diff=0.001)

self.assertEqual(0, static_mask.sum())

Expand All @@ -136,7 +136,7 @@ def test_static_period_at_the_end(self):
index_length = 14
series.iloc[-index_length:] = series.iloc[-index_length - 1]

static_mask = find_static_regions(series, diff_period=10, static_limit=0.001)
static_mask = find_static_regions(series, period=10, max_diff=0.001)

self.assertEqual(index_length, static_mask.sum())
self.assertEqual(index_length, static_mask.iloc[-index_length:].sum())

0 comments on commit bf18598

Please sign in to comment.