diff --git a/docs/sphinx/source/changelog.rst b/docs/sphinx/source/changelog.rst index 4260e3db..646cf832 100644 --- a/docs/sphinx/source/changelog.rst +++ b/docs/sphinx/source/changelog.rst @@ -1,6 +1,6 @@ RdTools Change Log ================== -.. include:: changelog/pending.rst +.. include:: changelog/v2.2.0-beta.1.rst .. include:: changelog/v2.1.4.rst .. include:: changelog/v2.2.0-beta.0.rst .. include:: changelog/v2.1.3.rst diff --git a/docs/sphinx/source/changelog/v2.2.0-beta.1.rst b/docs/sphinx/source/changelog/v2.2.0-beta.1.rst new file mode 100644 index 00000000..db65e0e6 --- /dev/null +++ b/docs/sphinx/source/changelog/v2.2.0-beta.1.rst @@ -0,0 +1,7 @@ +******************************** +v2.2.0-beta.1 (December 7, 2022) +******************************** + +Enhancements +------------ +* Added framework for running aggregated filters in ``analysis_chains.py`` (:pull:`348`) diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py index 2fbe2db0..e072209e 100644 --- a/rdtools/analysis_chains.py +++ b/rdtools/analysis_chains.py @@ -75,6 +75,14 @@ class TrendAnalysis(): filter_params defaults to empty dicts for each function in rdtools.filtering, in which case those functions use default parameter values, `ad_hoc_filter` defaults to None. See examples for more information. + filter_params_aggregated: dict + parameters to be passed to rdtools.filtering functions that specifically handle + aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions. + Values are dicts of parameters to be passed to those functions. Also has a special key + `ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters. + filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering, + in which case those functions use default parameter values, `ad_hoc_filter` + defaults to None. See examples for more information. results : dict Nested dict used to store the results of methods ending with `_analysis` ''' @@ -133,6 +141,9 @@ def __init__(self, pv, poa_global=None, temperature_cell=None, temperature_ambie 'csi_filter': {}, 'ad_hoc_filter': None # use this to include an explict filter } + self.filter_params_aggregated = { + 'ad_hoc_filter': None + } # remove tcell_filter from list if power_expected is passed in if power_expected is not None and temperature_cell is None: del self.filter_params['tcell_filter'] @@ -252,7 +263,8 @@ def _calc_clearsky_poa(self, times=None, rescale=True, **kwargs): clearsky_poa = clearsky_poa['poa_global'] if aggregate: - interval_id = pd.Series(range(len(self.poa_global)), index=self.poa_global.index) + interval_id = pd.Series( + range(len(self.poa_global)), index=self.poa_global.index) interval_id = interval_id.reindex(times, method='backfill') clearsky_poa = clearsky_poa.groupby(interval_id).mean() clearsky_poa.index = self.poa_global.index @@ -383,7 +395,8 @@ def _filter(self, energy_normalized, case): self.filter_params, which is a dict, the keys of which are names of functions in rdtools.filtering, and the values of which are dicts containing the associated parameters with which to run the filtering - functions. See examples for details on how to modify filter parameters. + functions. This private method is specifically for the original indexed + data. See examples for details on how to modify filter parameters. Parameters ---------- @@ -405,7 +418,8 @@ def _filter(self, energy_normalized, case): # at once. However, we add a default value of True, with the same index as # energy_normalized, so that the output is still correct even when all # filters have been disabled. - filter_components = {'default': pd.Series(True, index=energy_normalized.index)} + filter_components = {'default': pd.Series( + True, index=energy_normalized.index)} if case == 'sensor': poa = self.poa_global @@ -455,14 +469,16 @@ def _filter(self, energy_normalized, case): ad_hoc_filter = self.filter_params['ad_hoc_filter'] if ad_hoc_filter.isnull().any(): - warnings.warn('ad_hoc_filter contains NaN values; setting to False (excluding)') + warnings.warn( + 'ad_hoc_filter contains NaN values; setting to False (excluding)') ad_hoc_filter = ad_hoc_filter.fillna(False) if not filter_components.index.equals(ad_hoc_filter.index): warnings.warn('ad_hoc_filter index does not match index of other filters; missing ' 'values will be set to True (kept). Align the index with the index ' 'of the filter_components attribute to prevent this warning') - ad_hoc_filter = ad_hoc_filter.reindex(filter_components.index).fillna(True) + ad_hoc_filter = ad_hoc_filter.reindex( + filter_components.index).fillna(True) filter_components['ad_hoc_filter'] = ad_hoc_filter @@ -475,6 +491,63 @@ def _filter(self, energy_normalized, case): self.clearsky_filter = bool_filter self.clearsky_filter_components = filter_components + def _aggregated_filter(self, aggregated, case): + """ + Mirrors the _filter private function, but with aggregated filters applied. + These aggregated filters are based on those in rdtools.filtering. Uses + self.filter_params_aggregated, which is a dict, the keys of which are names of + functions in rdtools.filtering, and the values of which are dicts + containing the associated parameters with which to run the filtering + functions. See examples for details on how to modify filter parameters. + + Parameters + ---------- + aggregated : pandas.Series + Time series of aggregated normalized AC energy + case : str + 'sensor' or 'clearsky' which filtering protocol to apply. Affects + whether result is stored in self.sensor_filter_aggregated or + self.clearsky_filter_aggregated) + + Returns + ------- + None + """ + filter_components_aggregated = {'default': + pd.Series(True, index=aggregated.index)} + # Add daily aggregate filters as they come online here. + # Convert the dictionary into a dataframe (after running filters) + filter_components_aggregated = pd.DataFrame( + filter_components_aggregated).fillna(False) + # Run the ad-hoc filter from filter_params_aggregated, if available + if self.filter_params_aggregated.get('ad_hoc_filter', None) is not None: + ad_hoc_filter_aggregated = self.filter_params_aggregated['ad_hoc_filter'] + + if ad_hoc_filter_aggregated.isnull().any(): + warnings.warn( + 'aggregated ad_hoc_filter contains NaN values; setting to False (excluding)') + ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.fillna(False) + + if not filter_components_aggregated.index.equals(ad_hoc_filter_aggregated.index): + warnings.warn('Aggregated ad_hoc_filter index does not match index of other ' + 'filters; missing values will be set to True (kept). ' + 'Align the index with the index of the ' + 'filter_components_aggregated attribute to prevent this warning') + ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.reindex( + filter_components_aggregated.index).fillna(True) + + filter_components_aggregated['ad_hoc_filter'] = ad_hoc_filter_aggregated + + bool_filter_aggregated = filter_components_aggregated.all(axis=1) + filter_components_aggregated = filter_components_aggregated.drop( + columns=['default']) + if case == 'sensor': + self.sensor_filter_aggregated = bool_filter_aggregated + self.sensor_filter_components_aggregated = filter_components_aggregated + elif case == 'clearsky': + self.clearsky_filter_aggregated = bool_filter_aggregated + self.clearsky_filter_components_aggregated = filter_components_aggregated + def _filter_check(self, post_filter): ''' post-filter check for requisite 730 days of data @@ -621,8 +694,16 @@ def _sensor_preprocess(self): self._filter(energy_normalized, 'sensor') aggregated, aggregated_insolation = self._aggregate( energy_normalized[self.sensor_filter], insolation[self.sensor_filter]) - self.sensor_aggregated_performance = aggregated - self.sensor_aggregated_insolation = aggregated_insolation + # Run daily filters on aggregated data + self._aggregated_filter(aggregated, 'sensor') + # Apply filter to aggregated data and store + self.sensor_aggregated_performance = aggregated[self.sensor_filter_aggregated] + self.sensor_aggregated_insolation = aggregated_insolation[self.sensor_filter_aggregated] + # Reindex the data after the fact, so it's on the aggregated interval + self.sensor_aggregated_performance = self.sensor_aggregated_performance.asfreq( + self.aggregation_freq) + self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.asfreq( + self.aggregation_freq) def _clearsky_preprocess(self): ''' @@ -651,8 +732,17 @@ def _clearsky_preprocess(self): self._filter(cs_normalized, 'clearsky') cs_aggregated, cs_aggregated_insolation = self._aggregate( cs_normalized[self.clearsky_filter], cs_insolation[self.clearsky_filter]) - self.clearsky_aggregated_performance = cs_aggregated - self.clearsky_aggregated_insolation = cs_aggregated_insolation + # Run daily filters on aggregated data + self._aggregated_filter(cs_aggregated, 'clearsky') + # Apply daily filter to aggregated data and store + self.clearsky_aggregated_performance = cs_aggregated[self.clearsky_filter_aggregated] + self.clearsky_aggregated_insolation = \ + cs_aggregated_insolation[self.clearsky_filter_aggregated] + # Reindex the data after the fact, so it's on the aggregated interval + self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.asfreq( + self.aggregation_freq) + self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.asfreq( + self.aggregation_freq) def sensor_analysis(self, analyses=['yoy_degradation'], yoy_kwargs={}, srr_kwargs={}): ''' diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py index d22d8d98..d4903954 100644 --- a/rdtools/test/analysis_chains_test.py +++ b/rdtools/test/analysis_chains_test.py @@ -179,12 +179,39 @@ def test_sensor_analysis_ad_hoc_filter(sensor_parameters): rd_analysis.sensor_analysis(analyses=['yoy_degradation']) +def test_sensor_analysis_aggregated_ad_hoc_filter(sensor_parameters): + # by excluding all but a few points, we should trigger the <2yr error + filt = pd.Series(False, + index=sensor_parameters['pv'].index) + filt = filt.resample('1D').first().dropna(how='all') + filt.iloc[-500:] = True + rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) + rd_analysis.filter_params_aggregated['ad_hoc_filter'] = filt + with pytest.raises(ValueError, match="Less than two years of data left after filtering"): + rd_analysis.sensor_analysis(analyses=['yoy_degradation']) + + def test_filter_components(sensor_parameters): poa = sensor_parameters['poa_global'] poa_filter = (poa > 200) & (poa < 1200) rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) rd_analysis.sensor_analysis(analyses=['yoy_degradation']) - assert (poa_filter == rd_analysis.sensor_filter_components['poa_filter']).all() + assert (poa_filter == + rd_analysis.sensor_filter_components['poa_filter']).all() + + +def test_aggregated_filter_components(sensor_parameters): + daily_ad_hoc_filter = pd.Series(True, + index=sensor_parameters['pv'].index) + daily_ad_hoc_filter[:600] = False + daily_ad_hoc_filter = daily_ad_hoc_filter.resample( + '1D').first().dropna(how='all') + rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) + rd_analysis.filter_params = {} # disable all index-based filters + rd_analysis.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter + rd_analysis.sensor_analysis(analyses=['yoy_degradation']) + assert (daily_ad_hoc_filter == + rd_analysis.sensor_filter_components_aggregated['ad_hoc_filter']).all() def test_filter_components_no_filters(sensor_parameters): @@ -196,12 +223,23 @@ def test_filter_components_no_filters(sensor_parameters): assert rd_analysis.sensor_filter_components.empty +def test_aggregated_filter_components_no_filters(sensor_parameters): + rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) + rd_analysis.filter_params = {} # disable all index-based filters + rd_analysis.filter_params_aggregated = {} # disable all daily filters + rd_analysis.sensor_analysis(analyses=['yoy_degradation']) + expected = pd.Series(True, index=rd_analysis.pv_energy.index) + daily_expected = expected.resample('1D').first().dropna(how='all') + pd.testing.assert_series_equal(rd_analysis.sensor_filter_aggregated, + daily_expected) + assert rd_analysis.sensor_filter_components.empty + + @pytest.mark.parametrize('workflow', ['sensor', 'clearsky']) def test_filter_ad_hoc_warnings(workflow, sensor_parameters): rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) rd_analysis.set_clearsky(pvlib_location=pvlib.location.Location(40, -80), poa_global_clearsky=rd_analysis.poa_global) - # warning for incomplete index ad_hoc_filter = pd.Series(True, index=sensor_parameters['pv'].index[:-5]) rd_analysis.filter_params['ad_hoc_filter'] = ad_hoc_filter @@ -233,6 +271,54 @@ def test_filter_ad_hoc_warnings(workflow, sensor_parameters): assert components.drop(components.index[10])['ad_hoc_filter'].all() +@pytest.mark.parametrize('workflow', ['sensor', 'clearsky']) +def test_aggregated_filter_ad_hoc_warnings(workflow, sensor_parameters): + rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) + rd_analysis.set_clearsky(pvlib_location=pvlib.location.Location(40, -80), + poa_global_clearsky=rd_analysis.poa_global) + # disable all filters outside of CSI + rd_analysis.filter_params = {'csi_filter': {}} + # warning for incomplete index + daily_ad_hoc_filter = pd.Series(True, + index=sensor_parameters['pv'].index[:-5]) + daily_ad_hoc_filter = daily_ad_hoc_filter.resample( + '1D').first().dropna(how='all') + rd_analysis.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter + with pytest.warns(UserWarning, match='ad_hoc_filter index does not match index'): + if workflow == 'sensor': + rd_analysis.sensor_analysis(analyses=['yoy_degradation']) + components = rd_analysis.sensor_filter_components_aggregated + else: + rd_analysis.clearsky_analysis(analyses=['yoy_degradation']) + components = rd_analysis.clearsky_filter_components_aggregated + + # missing values set to True + assert components['ad_hoc_filter'].all() + + # warning about NaNs + rd_analysis_2 = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0) + rd_analysis_2.set_clearsky(pvlib_location=pvlib.location.Location(40, -80), + poa_global_clearsky=rd_analysis_2.poa_global) + # disable all filters outside of CSI + rd_analysis_2.filter_params = {'csi_filter': {}} + daily_ad_hoc_filter = pd.Series(True, index=sensor_parameters['pv'].index) + daily_ad_hoc_filter = daily_ad_hoc_filter.resample( + '1D').first().dropna(how='all') + daily_ad_hoc_filter.iloc[10] = np.nan + rd_analysis_2.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter + with pytest.warns(UserWarning, match='ad_hoc_filter contains NaN values; setting to False'): + if workflow == 'sensor': + rd_analysis_2.sensor_analysis(analyses=['yoy_degradation']) + components = rd_analysis_2.sensor_filter_components_aggregated + else: + rd_analysis_2.clearsky_analysis(analyses=['yoy_degradation']) + components = rd_analysis_2.clearsky_filter_components_aggregated + + # NaN values set to False + assert not components['ad_hoc_filter'].iloc[10] + assert components.drop(components.index[10])['ad_hoc_filter'].all() + + def test_cell_temperature_model_invalid(sensor_parameters): wind = pd.Series(0, index=sensor_parameters['pv'].index) sensor_parameters.pop('temperature_model') @@ -351,8 +437,10 @@ def test_index_mismatch(): # GH #277 times = pd.date_range('2019-01-01', '2022-01-01', freq='15min') pv = pd.Series(1.0, index=times) - dummy_series = pd.Series(1.0, index=times[::4]) # low-frequency weather inputs - keys = ['poa_global', 'temperature_cell', 'temperature_ambient', 'power_expected', 'windspeed'] + # low-frequency weather inputs + dummy_series = pd.Series(1.0, index=times[::4]) + keys = ['poa_global', 'temperature_cell', + 'temperature_ambient', 'power_expected', 'windspeed'] kwargs = {key: dummy_series.copy() for key in keys} rd_analysis = TrendAnalysis(pv, **kwargs) for key in keys: