Skip to content

Commit

Permalink
Merge branch 'aggregated_filters_for_trials' into remove-deprecated-f…
Browse files Browse the repository at this point in the history
…unctions
  • Loading branch information
mdeceglie committed Jul 2, 2024
2 parents fc0ee0c + d1eb662 commit 616e358
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 68 deletions.
5 changes: 5 additions & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ Filtering
poa_filter
tcell_filter
normalized_filter
two_way_window_filter
insolation_filter
hampel_filter
directional_tukey_filter
hour_angle_filter


Normalization
Expand Down
1 change: 1 addition & 0 deletions docs/sphinx/source/changelog/pending.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ when compared with older versions of RdTools
Enhancements
------------
* Added a new wrapper function for clearsky filters (:pull:`412`)
* Improve test coverage, especially for the newly added filter capabilities (:pull:`413`)

Bug fixes
---------
Expand Down
14 changes: 8 additions & 6 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ class TrendAnalysis:
filter_params_aggregated: dict
parameters to be passed to rdtools.filtering functions that specifically handle
aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions.
Values are dicts of parameters to be passed to those functions. Also has a special key
Values are dicts of parameters to be passed to those functions. To invoke `clearsky_filter`
for a sensor analysis, use the special key `sensor_clearsky_filter`. Also has a special key
`ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters.
filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering,
in which case those functions use default parameter values, `ad_hoc_filter`
Expand Down Expand Up @@ -532,11 +533,12 @@ def _call_clearsky_filter(filter_string):
filter_components["clearsky_filter"] = _call_clearsky_filter(
"clearsky_filter"
)
# TODO: Ask Mike about this section
# if "sensor_clearsky_filter" in self.filter_params:
# filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
# "sensor_clearsky_filter"
# )

if "sensor_clearsky_filter" in self.filter_params:
filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
"sensor_clearsky_filter"
)


# note: the previous implementation using the & operator treated NaN
# filter values as False, so we do the same here for consistency:
Expand Down
87 changes: 71 additions & 16 deletions rdtools/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def pvlib_clearsky_filter(
**kwargs,
):
"""
Filtering based on the Reno and Hansen method for clearsky filtering
Filtering based on the Reno and Hansen method for clear-sky filtering
as implimented in pvlib. Requires a regular time series with uniform
time steps.
Expand Down Expand Up @@ -794,9 +794,8 @@ def _calculate_xgboost_model_features(df, sampling_frequency):

def xgboost_clip_filter(power_ac, mounting_type="fixed"):
"""
This function generates the features to run through the XGBoost
clipping model, runs the data through the model, and generates
model outputs.
This filter uses and XGBoost model to filter out
clipping periods in AC power or energy time series.
Parameters
----------
Expand Down Expand Up @@ -948,7 +947,7 @@ def two_way_window_filter(
series, roll_period=pd.to_timedelta("7 Days"), outlier_threshold=0.03
):
"""
Removes outliers based on forward and backward window of the rolling median. Points beyond
Removes anomalies based on forward and backward window of the rolling median. Points beyond
outlier_threshold from both the forward and backward-looking median are excluded by the filter.
Parameters
Expand All @@ -959,6 +958,11 @@ def two_way_window_filter(
The window to use for backward and forward
rolling medians for detecting outliers.
outlier_threshold : default is 0.03 meaning 3%
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

series = series / series.quantile(0.99)
Expand All @@ -984,23 +988,35 @@ def two_way_window_filter(

def insolation_filter(insolation, quantile=0.1):
"""
TODO: figure out if this should be more general
A simple quantile filter. Primary application in RdTools is to exclude
low insolation points after the aggregation step.
returns a filter that excludes everything below quantile from insolation
Parameters
----------
insolation: pandas.Series
Pandas time series to be filtered. Usually insolation.
quantile : float, default 0.1
the minimum quantile above which data is kept.
Returns
-------
pandas.Series
Boolean Series excluding points below the quantile threshold
"""

limit = insolation.quantile(quantile)
mask = insolation >= limit
return mask


def hampel_filter(vals, k="14d", t0=3):
def hampel_filter(series, k="14d", t0=3):
"""
Hampel outlier filter primarily applied on daily normalized data but broadly
Hampel outlier filter primarily applied after aggregation step, but broadly
applicable.
Parameters
----------
vals : pandas.Series
series : pandas.Series
daily normalized time series
k : int or time offset string e.g. 'd', default 14d
size of window including the sample; 14d is equal to 7 days on either
Expand All @@ -1010,13 +1026,13 @@ def hampel_filter(vals, k="14d", t0=3):
Returns
-------
pandas.Series
Boolean Series of whether the given measurement is within 3 sigma of the
median. False points indicate outliers to be removed.
Boolean Series of whether the given measurement is within t0 sigma of the
rolling median. False points indicate outliers to be excluded.
"""
# Hampel Filter
L = 1.4826
rolling_median = vals.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - vals)
rolling_median = series.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - series)
median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
threshold = t0 * L * median_abs_deviation
return difference <= threshold
Expand All @@ -1034,9 +1050,28 @@ def _tukey_fence(series, k=1.5):

def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.5):
"""
Performs a forward and backward looking rolling tukey filter. Points must only
pass one of either the forward or backward looking filters to be kept
Performs a forward and backward looking rolling Tukey filter. Points more than k*IQR
above the third quartile or below the first quartile are classified as outliers.Points
must only pass one of either the forward or backward looking filters to be kept.
Parameters
----------
series: pandas.Series
Pandas time series to be filtered.
roll_period : int or timedelta, default 7 days
The window to use for backward and forward
rolling medians for detecting outliers.
k : float
The Tukey parameter. Points more than k*IQR above the third quartile
or below the first quartile are classified as outliers.
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

backward_median = series.rolling(roll_period, min_periods=5, closed="both").median()
forward_median = (
series.loc[::-1].rolling(roll_period, min_periods=5, closed="both").median()
Expand All @@ -1056,6 +1091,26 @@ def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.
def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
"""
Creates a filter based on the hour angle of the sun (15 degrees per hour)
Parameters
----------
series: pandas.Series
Pandas time series to be filtered
lat: float
location latitude
lon: float
location longitude
min_hour_angle: float
minimum hour angle to include
max_hour_angle: float
maximum hour angle to include
Returns
-------
pandas.Series
Boolean Series excluding points outside the specified hour
angle range
"""

times = series.index
Expand Down
19 changes: 19 additions & 0 deletions rdtools/test/analysis_chains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,16 @@ def clearsky_optional(cs_input, clearsky_analysis):
return extras


@pytest.fixture
def sensor_clearsky_analysis(cs_input, clearsky_parameters):
rd_analysis = TrendAnalysis(**clearsky_parameters)
rd_analysis.set_clearsky(**cs_input)
rd_analysis.filter_params = {} # disable all index-based filters
rd_analysis.filter_params["sensor_clearsky_filter"] = {"model": "csi"}
rd_analysis.sensor_analysis(analyses=["yoy_degradation"])
return rd_analysis


def test_clearsky_analysis(clearsky_analysis):
yoy_results = clearsky_analysis.results["clearsky"]["yoy_degradation"]
ci = yoy_results["rd_confidence_interval"]
Expand All @@ -490,6 +500,15 @@ def test_clearsky_analysis_optional(
assert [-4.71, -4.69] == pytest.approx(ci, abs=1e-2)


def test_sensor_clearsky_analysis(sensor_clearsky_analysis):
yoy_results = sensor_clearsky_analysis.results["sensor"]["yoy_degradation"]
ci = yoy_results["rd_confidence_interval"]
rd = yoy_results["p50_rd"]
print(ci)
assert -5.18 == pytest.approx(rd, abs=1e-2)
assert [-5.18, -5.18] == pytest.approx(ci, abs=1e-2)


@pytest.fixture
def clearsky_analysis_exp_power(clearsky_parameters, clearsky_optional):
power_expected = normalization.pvwatts_dc_power(
Expand Down
13 changes: 6 additions & 7 deletions rdtools/test/bootstrap_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@ def test_bootstrap_module(
):

if decomposition_type == "error":
pytest.raises(
ValueError,
_make_time_series_bootstrap_samples,
cods_normalized_daily,
cods_normalized_daily_wo_noise,
decomposition_type=decomposition_type,
)
with pytest.raises(ValueError):
_make_time_series_bootstrap_samples(
cods_normalized_daily,
cods_normalized_daily_wo_noise,
decomposition_type=decomposition_type)

else:
# Rest make time serie bootstrap samples and construct of confidence intervals.
# Test make bootstrap samples
Expand Down
8 changes: 3 additions & 5 deletions rdtools/test/energy_from_power_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ def test_energy_from_power_leading_nans():

def test_energy_from_power_series_index():
power = pd.Series([1, 2, 3, 4, 5])
pytest.raises(
ValueError,
energy_from_power,
power,
)
with pytest.raises(ValueError):
energy_from_power(power)

54 changes: 27 additions & 27 deletions rdtools/test/filtering_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def test_clearsky_filter(mocker):

# Check that a ValueError is thrown when a model is passed that
# is not in the acceptable list.
pytest.raises(ValueError, clearsky_filter,
measured_poa,
clearsky_poa,
model='invalid')
with pytest.raises(ValueError):
clearsky_filter(measured_poa,
clearsky_poa,
model='invalid')

# Check that the csi_filter function is called
mock_csi_filter = mocker.patch('rdtools.filtering.csi_filter')
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_pvlib_clearsky_filter(lookup_parameters):
lookup_parameters=lookup_parameters)

# Expect clearsky index is filtered.
expected_result = expected_result = poa_global_measured > 500
expected_result = poa_global_measured > 500
pd.testing.assert_series_equal(filtered, expected_result)


Expand Down Expand Up @@ -200,16 +200,16 @@ def test_logic_clip_filter(generate_power_time_series_no_clipping,
generate_power_time_series_no_clipping
# Test that a Type Error is raised when a pandas series
# without a datetime index is used.
pytest.raises(TypeError, logic_clip_filter,
power_no_datetime_index_nc)
with pytest.raises(TypeError):
logic_clip_filter(power_no_datetime_index_nc)
# Test that an error is thrown when we don't include the correct
# mounting configuration input
pytest.raises(ValueError, logic_clip_filter,
power_datetime_index_nc, 'not_fixed')
with pytest.raises(ValueError):
logic_clip_filter(power_datetime_index_nc, 'not_fixed')
# Test that an error is thrown when there are 10 or fewer readings
# in the time series
pytest.raises(Exception, logic_clip_filter,
power_datetime_index_nc[:9])
with pytest.raises(Exception):
logic_clip_filter(power_datetime_index_nc[:9])
# Test that a warning is thrown when the time series is tz-naive
warnings.simplefilter("always")
with warnings.catch_warnings(record=True) as record:
Expand All @@ -220,8 +220,8 @@ def test_logic_clip_filter(generate_power_time_series_no_clipping,
# Scramble the index and run through the filter. This should throw
# an IndexError.
power_datetime_index_nc_shuffled = power_datetime_index_nc.sample(frac=1)
pytest.raises(IndexError, logic_clip_filter,
power_datetime_index_nc_shuffled, 'fixed')
with pytest.raises(IndexError):
logic_clip_filter(power_datetime_index_nc_shuffled, 'fixed')
# Generate 1-minute interval data, run it through the function, and
# check that the associated data returned is 1-minute
power_datetime_index_one_min_intervals = \
Expand Down Expand Up @@ -270,16 +270,16 @@ def test_xgboost_clip_filter(generate_power_time_series_no_clipping,
generate_power_time_series_no_clipping
# Test that a Type Error is raised when a pandas series
# without a datetime index is used.
pytest.raises(TypeError, xgboost_clip_filter,
power_no_datetime_index_nc)
with pytest.raises(TypeError):
xgboost_clip_filter(power_no_datetime_index_nc)
# Test that an error is thrown when we don't include the correct
# mounting configuration input
pytest.raises(ValueError, xgboost_clip_filter,
power_datetime_index_nc, 'not_fixed')
with pytest.raises(ValueError):
xgboost_clip_filter(power_datetime_index_nc, 'not_fixed')
# Test that an error is thrown when there are 10 or fewer readings
# in the time series
pytest.raises(Exception, xgboost_clip_filter,
power_datetime_index_nc[:9])
with pytest.raises(Exception):
xgboost_clip_filter(power_datetime_index_nc[:9])
# Test that a warning is thrown when the time series is tz-naive
warnings.simplefilter("always")
with warnings.catch_warnings(record=True) as record:
Expand All @@ -291,8 +291,8 @@ def test_xgboost_clip_filter(generate_power_time_series_no_clipping,
# Scramble the index and run through the filter. This should throw
# an IndexError.
power_datetime_index_nc_shuffled = power_datetime_index_nc.sample(frac=1)
pytest.raises(IndexError, xgboost_clip_filter,
power_datetime_index_nc_shuffled, 'fixed')
with pytest.raises(IndexError):
xgboost_clip_filter(power_datetime_index_nc_shuffled, 'fixed')
# Generate 1-minute interval data, run it through the function, and
# check that the associated data returned is 1-minute
power_datetime_index_one_min_intervals = \
Expand Down Expand Up @@ -342,9 +342,8 @@ def test_clip_filter(generate_power_time_series_no_clipping):

# Check that a ValueError is thrown when a model is passed that
# is not in the acceptable list.
pytest.raises(ValueError, clip_filter,
power_datetime_index_nc,
'random_forest')
with pytest.raises(ValueError):
clip_filter(power_datetime_index_nc, 'random_forest')
# Check that the wrapper handles the xgboost clipping
# function with kwargs.
filtered_xgboost = clip_filter(power_datetime_index_nc,
Expand All @@ -358,9 +357,10 @@ def test_clip_filter(generate_power_time_series_no_clipping):
rolling_range_max_cutoff=0.3)
# Check that the function returns a Typr Error if a wrong keyword
# arg is passed in the kwarg arguments.
pytest.raises(TypeError, clip_filter, power_datetime_index_nc,
'xgboost',
rolling_range_max_cutoff=0.3)
with pytest.raises(TypeError):
clip_filter(power_datetime_index_nc,
'xgboost',
rolling_range_max_cutoff=0.3)
assert bool((expected_result_quantile == filtered_quantile)
.all(axis=None))
assert bool(filtered_xgboost.all(axis=None))
Expand Down
Loading

0 comments on commit 616e358

Please sign in to comment.