Merge branch 'aggregated_filters_for_trials' into remove-deprecated-f…

…unctions
NREL · Jul 2, 2024 · 616e358 · 616e358
2 parents fc0ee0c + d1eb662
commit 616e358
Show file tree

Hide file tree

Showing 9 changed files with 143 additions and 68 deletions.
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -111,6 +111,11 @@ Filtering
    poa_filter
    tcell_filter
    normalized_filter
+   two_way_window_filter
+   insolation_filter
+   hampel_filter
+   directional_tukey_filter
+   hour_angle_filter
 
 
 Normalization

diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst
@@ -12,6 +12,7 @@ when compared with older versions of RdTools
 Enhancements
 ------------
 * Added a new wrapper function for clearsky filters (:pull:`412`)
+* Improve test coverage, especially for the newly added filter capabilities (:pull:`413`)
 
 Bug fixes
 ---------

diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py
@@ -79,7 +79,8 @@ class TrendAnalysis:
     filter_params_aggregated: dict
         parameters to be passed to rdtools.filtering functions that specifically handle
         aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions.
-        Values are dicts of parameters to be passed to those functions. Also has a special key
+        Values are dicts of parameters to be passed to those functions. To invoke `clearsky_filter`
+        for a sensor analysis, use the special key `sensor_clearsky_filter`. Also has a special key
         `ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters.
         filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering,
         in which case those functions use default parameter values,  `ad_hoc_filter`
@@ -532,11 +533,12 @@ def _call_clearsky_filter(filter_string):
             filter_components["clearsky_filter"] = _call_clearsky_filter(
                 "clearsky_filter"
             )
-        # TODO: Ask Mike about this section
-        # if "sensor_clearsky_filter" in self.filter_params:
-        #     filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
-        #         "sensor_clearsky_filter"
-        #     )
+
+        if "sensor_clearsky_filter" in self.filter_params:
+            filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
+                "sensor_clearsky_filter"
+            )
+
 
         # note: the previous implementation using the & operator treated NaN
         # filter values as False, so we do the same here for consistency:

diff --git a/rdtools/filtering.py b/rdtools/filtering.py
@@ -178,7 +178,7 @@ def pvlib_clearsky_filter(
     **kwargs,
 ):
     """
-    Filtering based on the Reno and Hansen method for clearsky filtering
+    Filtering based on the Reno and Hansen method for clear-sky filtering
     as implimented in pvlib. Requires a regular time series with uniform
     time steps.
 
@@ -794,9 +794,8 @@ def _calculate_xgboost_model_features(df, sampling_frequency):
 
 def xgboost_clip_filter(power_ac, mounting_type="fixed"):
     """
-    This function generates the features to run through the XGBoost
-    clipping model, runs the data through the model, and generates
-    model outputs.
+    This filter uses and XGBoost model to filter out
+    clipping periods in AC power or energy time series.
 
     Parameters
     ----------
@@ -948,7 +947,7 @@ def two_way_window_filter(
     series, roll_period=pd.to_timedelta("7 Days"), outlier_threshold=0.03
 ):
     """
-    Removes outliers based on forward and backward window of the rolling median. Points beyond
+    Removes anomalies based on forward and backward window of the rolling median. Points beyond
     outlier_threshold from both the forward and backward-looking median are excluded by the filter.
 
     Parameters
@@ -959,6 +958,11 @@ def two_way_window_filter(
         The window to use for backward and forward
         rolling medians for detecting outliers.
     outlier_threshold : default is 0.03 meaning 3%
+
+    Returns
+    -------
+    pandas.Series
+        Boolean Series excluding anomalies
     """
 
     series = series / series.quantile(0.99)
@@ -984,23 +988,35 @@ def two_way_window_filter(
 
 def insolation_filter(insolation, quantile=0.1):
     """
-    TODO: figure out if this should be more general
+    A simple quantile filter. Primary application in RdTools is to exclude
+    low insolation points after the aggregation step.
 
-    returns a filter that excludes everything below quantile from insolation
+    Parameters
+    ----------
+    insolation: pandas.Series
+        Pandas time series to be filtered. Usually insolation.
+    quantile : float, default 0.1
+        the minimum quantile above which data is kept.
+
+    Returns
+    -------
+    pandas.Series
+        Boolean Series excluding points below the quantile threshold
     """
 
     limit = insolation.quantile(quantile)
     mask = insolation >= limit
     return mask
 
 
-def hampel_filter(vals, k="14d", t0=3):
+def hampel_filter(series, k="14d", t0=3):
     """
-    Hampel outlier filter primarily applied on daily normalized data but broadly
+    Hampel outlier filter primarily applied after aggregation step, but broadly
     applicable.
+
     Parameters
     ----------
-    vals : pandas.Series
+    series : pandas.Series
         daily normalized time series
     k : int or time offset string e.g. 'd', default 14d
         size of window including the sample; 14d is equal to 7 days on either
@@ -1010,13 +1026,13 @@ def hampel_filter(vals, k="14d", t0=3):
     Returns
     -------
     pandas.Series
-        Boolean Series of whether the given measurement is within 3 sigma of the
-        median.  False points indicate outliers to be removed.
+        Boolean Series of whether the given measurement is within t0 sigma of the
+        rolling median.  False points indicate outliers to be excluded.
     """
     # Hampel Filter
     L = 1.4826
-    rolling_median = vals.rolling(k, center=True, min_periods=1).median()
-    difference = np.abs(rolling_median - vals)
+    rolling_median = series.rolling(k, center=True, min_periods=1).median()
+    difference = np.abs(rolling_median - series)
     median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
     threshold = t0 * L * median_abs_deviation
     return difference <= threshold
@@ -1034,9 +1050,28 @@ def _tukey_fence(series, k=1.5):
 
 def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.5):
     """
-    Performs a forward and backward looking rolling tukey filter. Points must only
-    pass one of either the forward or backward looking filters to be kept
+    Performs a forward and backward looking rolling Tukey filter. Points more than k*IQR
+    above the third quartile or below the first quartile are classified as outliers.Points
+    must only pass one of either the forward or backward looking filters to be kept.
+
+
+    Parameters
+    ----------
+    series: pandas.Series
+        Pandas time series to be filtered.
+    roll_period : int or timedelta, default 7 days
+        The window to use for backward and forward
+        rolling medians for detecting outliers.
+    k : float
+        The Tukey parameter. Points more than k*IQR above the third quartile
+        or below the first quartile are classified as outliers.
+
+    Returns
+    -------
+    pandas.Series
+        Boolean Series excluding anomalies
     """
+
     backward_median = series.rolling(roll_period, min_periods=5, closed="both").median()
     forward_median = (
         series.loc[::-1].rolling(roll_period, min_periods=5, closed="both").median()
@@ -1056,6 +1091,26 @@ def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.
 def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
     """
     Creates a filter based on the hour angle of the sun (15 degrees per hour)
+
+    Parameters
+    ----------
+    series: pandas.Series
+        Pandas time series to be filtered
+    lat: float
+        location latitude
+    lon: float
+        location longitude
+    min_hour_angle: float
+        minimum hour angle to include
+    max_hour_angle: float
+        maximum hour angle to include
+
+    Returns
+    -------
+    pandas.Series
+        Boolean Series excluding points outside the specified hour
+        angle range
+
     """
 
     times = series.index

diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py
@@ -467,6 +467,16 @@ def clearsky_optional(cs_input, clearsky_analysis):
     return extras
 
 
+@pytest.fixture
+def sensor_clearsky_analysis(cs_input, clearsky_parameters):
+    rd_analysis = TrendAnalysis(**clearsky_parameters)
+    rd_analysis.set_clearsky(**cs_input)
+    rd_analysis.filter_params = {}  # disable all index-based filters
+    rd_analysis.filter_params["sensor_clearsky_filter"] = {"model": "csi"}
+    rd_analysis.sensor_analysis(analyses=["yoy_degradation"])
+    return rd_analysis
+
+
 def test_clearsky_analysis(clearsky_analysis):
     yoy_results = clearsky_analysis.results["clearsky"]["yoy_degradation"]
     ci = yoy_results["rd_confidence_interval"]
@@ -490,6 +500,15 @@ def test_clearsky_analysis_optional(
     assert [-4.71, -4.69] == pytest.approx(ci, abs=1e-2)
 
 
+def test_sensor_clearsky_analysis(sensor_clearsky_analysis):
+    yoy_results = sensor_clearsky_analysis.results["sensor"]["yoy_degradation"]
+    ci = yoy_results["rd_confidence_interval"]
+    rd = yoy_results["p50_rd"]
+    print(ci)
+    assert -5.18 == pytest.approx(rd, abs=1e-2)
+    assert [-5.18, -5.18] == pytest.approx(ci, abs=1e-2)
+
+
 @pytest.fixture
 def clearsky_analysis_exp_power(clearsky_parameters, clearsky_optional):
     power_expected = normalization.pvwatts_dc_power(

diff --git a/rdtools/test/bootstrap_test.py b/rdtools/test/bootstrap_test.py
@@ -15,13 +15,12 @@ def test_bootstrap_module(
 ):
 
     if decomposition_type == "error":
-        pytest.raises(
-            ValueError,
-            _make_time_series_bootstrap_samples,
-            cods_normalized_daily,
-            cods_normalized_daily_wo_noise,
-            decomposition_type=decomposition_type,
-        )
+        with pytest.raises(ValueError):
+            _make_time_series_bootstrap_samples(
+                cods_normalized_daily,
+                cods_normalized_daily_wo_noise,
+                decomposition_type=decomposition_type)
+
     else:
         # Rest make time serie bootstrap samples and construct of confidence intervals.
         # Test make bootstrap samples

diff --git a/rdtools/test/energy_from_power_test.py b/rdtools/test/energy_from_power_test.py
@@ -122,8 +122,6 @@ def test_energy_from_power_leading_nans():
 
 def test_energy_from_power_series_index():
     power = pd.Series([1, 2, 3, 4, 5])
-    pytest.raises(
-        ValueError,
-        energy_from_power,
-        power,
-    )
+    with pytest.raises(ValueError):
+        energy_from_power(power)
+
diff --git a/rdtools/test/filtering_test.py b/rdtools/test/filtering_test.py
@@ -29,10 +29,10 @@ def test_clearsky_filter(mocker):
 
     # Check that a ValueError is thrown when a model is passed that
     # is not in the acceptable list.
-    pytest.raises(ValueError, clearsky_filter,
-                  measured_poa,
-                  clearsky_poa,
-                  model='invalid')
+    with pytest.raises(ValueError):
+        clearsky_filter(measured_poa,
+                        clearsky_poa,
+                        model='invalid')
 
     # Check that the csi_filter function is called
     mock_csi_filter = mocker.patch('rdtools.filtering.csi_filter')
@@ -79,7 +79,7 @@ def test_pvlib_clearsky_filter(lookup_parameters):
                                      lookup_parameters=lookup_parameters)
 
     # Expect clearsky index is filtered.
-    expected_result = expected_result = poa_global_measured > 500
+    expected_result = poa_global_measured > 500
     pd.testing.assert_series_equal(filtered, expected_result)
 
 
@@ -200,16 +200,16 @@ def test_logic_clip_filter(generate_power_time_series_no_clipping,
         generate_power_time_series_no_clipping
     # Test that a Type Error is raised when a pandas series
     # without a datetime index is used.
-    pytest.raises(TypeError,  logic_clip_filter,
-                  power_no_datetime_index_nc)
+    with pytest.raises(TypeError):
+        logic_clip_filter(power_no_datetime_index_nc)
     # Test that an error is thrown when we don't include the correct
     # mounting configuration input
-    pytest.raises(ValueError,  logic_clip_filter,
-                  power_datetime_index_nc, 'not_fixed')
+    with pytest.raises(ValueError):
+        logic_clip_filter(power_datetime_index_nc, 'not_fixed')
     # Test that an error is thrown when there are 10 or fewer readings
     # in the time series
-    pytest.raises(Exception,  logic_clip_filter,
-                  power_datetime_index_nc[:9])
+    with pytest.raises(Exception):
+        logic_clip_filter(power_datetime_index_nc[:9])
     # Test that a warning is thrown when the time series is tz-naive
     warnings.simplefilter("always")
     with warnings.catch_warnings(record=True) as record:
@@ -220,8 +220,8 @@ def test_logic_clip_filter(generate_power_time_series_no_clipping,
     # Scramble the index and run through the filter. This should throw
     # an IndexError.
     power_datetime_index_nc_shuffled = power_datetime_index_nc.sample(frac=1)
-    pytest.raises(IndexError,  logic_clip_filter,
-                  power_datetime_index_nc_shuffled, 'fixed')
+    with pytest.raises(IndexError):
+        logic_clip_filter(power_datetime_index_nc_shuffled, 'fixed')
     # Generate 1-minute interval data, run it through the function, and
     # check that the associated data returned is 1-minute
     power_datetime_index_one_min_intervals = \
@@ -270,16 +270,16 @@ def test_xgboost_clip_filter(generate_power_time_series_no_clipping,
         generate_power_time_series_no_clipping
     # Test that a Type Error is raised when a pandas series
     # without a datetime index is used.
-    pytest.raises(TypeError,  xgboost_clip_filter,
-                  power_no_datetime_index_nc)
+    with pytest.raises(TypeError):
+        xgboost_clip_filter(power_no_datetime_index_nc)
     # Test that an error is thrown when we don't include the correct
     # mounting configuration input
-    pytest.raises(ValueError,  xgboost_clip_filter,
-                  power_datetime_index_nc, 'not_fixed')
+    with pytest.raises(ValueError):
+        xgboost_clip_filter(power_datetime_index_nc, 'not_fixed')
     # Test that an error is thrown when there are 10 or fewer readings
     # in the time series
-    pytest.raises(Exception,  xgboost_clip_filter,
-                  power_datetime_index_nc[:9])
+    with pytest.raises(Exception):
+        xgboost_clip_filter(power_datetime_index_nc[:9])
     # Test that a warning is thrown when the time series is tz-naive
     warnings.simplefilter("always")
     with warnings.catch_warnings(record=True) as record:
@@ -291,8 +291,8 @@ def test_xgboost_clip_filter(generate_power_time_series_no_clipping,
     # Scramble the index and run through the filter. This should throw
     # an IndexError.
     power_datetime_index_nc_shuffled = power_datetime_index_nc.sample(frac=1)
-    pytest.raises(IndexError,  xgboost_clip_filter,
-                  power_datetime_index_nc_shuffled, 'fixed')
+    with pytest.raises(IndexError):
+        xgboost_clip_filter(power_datetime_index_nc_shuffled, 'fixed')
     # Generate 1-minute interval data, run it through the function, and
     # check that the associated data returned is 1-minute
     power_datetime_index_one_min_intervals = \
@@ -342,9 +342,8 @@ def test_clip_filter(generate_power_time_series_no_clipping):
 
     # Check that a ValueError is thrown when a model is passed that
     # is not in the acceptable list.
-    pytest.raises(ValueError, clip_filter,
-                  power_datetime_index_nc,
-                  'random_forest')
+    with pytest.raises(ValueError):
+        clip_filter(power_datetime_index_nc, 'random_forest')
     # Check that the wrapper handles the xgboost clipping
     # function with kwargs.
     filtered_xgboost = clip_filter(power_datetime_index_nc,
@@ -358,9 +357,10 @@ def test_clip_filter(generate_power_time_series_no_clipping):
                                  rolling_range_max_cutoff=0.3)
     # Check that the function returns a Typr Error if a wrong keyword
     # arg is passed in the kwarg arguments.
-    pytest.raises(TypeError, clip_filter, power_datetime_index_nc,
-                  'xgboost',
-                  rolling_range_max_cutoff=0.3)
+    with pytest.raises(TypeError):
+        clip_filter(power_datetime_index_nc,
+                    'xgboost',
+                    rolling_range_max_cutoff=0.3)
     assert bool((expected_result_quantile == filtered_quantile)
                 .all(axis=None))
     assert bool(filtered_xgboost.all(axis=None))