From 569f27623f58e484582210ffdd81cd8c9f1633d3 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 11 Dec 2024 10:18:41 -0500 Subject: [PATCH 01/15] implementation of initial suggestion --- .gitignore | 1 + rdtools/analysis_chains.py | 5 +++-- rdtools/filtering.py | 37 +++++++++++++++++++++++++++---------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index a289ac22..95b56ca8 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ docs/sphinx/source/generated .eggs/ build/ dist/ +tmp/ rdtools.egg-info* # emacs temp files diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py index 6ba72042..95330de4 100644 --- a/rdtools/analysis_chains.py +++ b/rdtools/analysis_chains.py @@ -436,7 +436,9 @@ def _pvwatts_norm(self, poa_global, temperature_cell): if renorm: # Normalize to the 95th percentile for convenience, this is renormalized out # in the calculations but is relevant to normalized_filter() - x = energy_normalized[np.isfinite(energy_normalized)] + lower = energy_normalized.fillna(0).quantile(0.95) / 1000 + x = energy_normalized[energy_normalized > lower] + # x = energy_normalized[np.isfinite(energy_normalized)] energy_normalized = energy_normalized / x.quantile(0.95) return energy_normalized, insolation @@ -949,7 +951,6 @@ def sensor_analysis( ------- None """ - self._sensor_preprocess() sensor_results = {} diff --git a/rdtools/filtering.py b/rdtools/filtering.py index e3255b36..0f99c505 100644 --- a/rdtools/filtering.py +++ b/rdtools/filtering.py @@ -294,7 +294,7 @@ def pvlib_clearsky_filter( def clip_filter(power_ac, model="logic", **kwargs): """ Master wrapper for running one of the desired clipping filters. - The default filter run is the quantile clipping filter. + The default filter run is the logic clipping filter. Parameters ---------- @@ -335,7 +335,8 @@ def quantile_clip_filter(power_ac, quantile=0.98): """ Filter data points likely to be affected by clipping with power or energy greater than or equal to 99% of the `quant` - quantile. + quantile. NaN's and small values (power_ac(quantile) / 1000) are + removed before calculating clipping threshold. Parameters ---------- @@ -350,8 +351,16 @@ def quantile_clip_filter(power_ac, quantile=0.98): Boolean Series of whether the given measurement is below 99% of the quantile filter. """ - v = power_ac.quantile(quantile) - return power_ac < v * 0.99 + # Replace NaN's and small values for quantile calculation + # This ensures that power series with NaN's instead of zero values + # provide the same result. + lower = power_ac.fillna(0).quantile(quantile) / 1000 + + # Calculate the quantile and upper clipping threshold + q = power_ac[power_ac > lower].quantile(quantile) + upper = q * 0.99 + + return power_ac < upper def _format_clipping_time_series(power_ac, mounting_type): @@ -510,13 +519,18 @@ def _apply_overall_clipping_threshold(power_ac, clipping_mask, clipped_power_ac) periods are labeled as True and non-clipping periods are labeled as False. Has a pandas datetime index. """ + + # Ensure that series with NaN's return same results as series with 0's + lower = power_ac.fillna(0).quantile(0.99) / 1000 + power_ac_quant = power_ac[power_ac > lower].quantile(0.99) + upper_bound_pdiff = abs( - (power_ac.quantile(0.99) - clipped_power_ac.quantile(0.99)) - / ((power_ac.quantile(0.99) + clipped_power_ac.quantile(0.99)) / 2) + (power_ac_quant - clipped_power_ac.quantile(0.99)) + / ((power_ac_quant + clipped_power_ac.quantile(0.99)) / 2) ) percent_clipped = len(clipped_power_ac) / len(power_ac) * 100 if (upper_bound_pdiff < 0.005) & (percent_clipped > 4): - max_clip = power_ac >= power_ac.quantile(0.99) + max_clip = power_ac >= power_ac_quant clipping_mask = clipping_mask | max_clip return clipping_mask @@ -642,12 +656,15 @@ def logic_clip_filter( # Set any values within the clipping max + clipping min threshold # as clipping. This is done specifically for capturing the noise # for high frequency data sets. + + # Ensure that time series with zeros and nan's return same result + # lower = clip_pwr.fillna(0).quantile(0.99) / 1000 + # clip_pwr_no_nan = clip_pwr[clip_pwr > lower] + daily_mean = clip_pwr.resample("D").mean() df_daily = daily_mean.to_frame(name="mean") df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.99) - df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile( - 0.075 - ) + df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.075) daily_clipping_max = df_daily["clipping_max"].reindex( index=power_ac_copy.index, method="ffill" ) From 0052423fe2ff11ec6f371700772ac4f8ff1de104 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 11 Dec 2024 10:37:57 -0500 Subject: [PATCH 02/15] update logic filter --- rdtools/filtering.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rdtools/filtering.py b/rdtools/filtering.py index 0f99c505..362defd3 100644 --- a/rdtools/filtering.py +++ b/rdtools/filtering.py @@ -658,13 +658,13 @@ def logic_clip_filter( # for high frequency data sets. # Ensure that time series with zeros and nan's return same result - # lower = clip_pwr.fillna(0).quantile(0.99) / 1000 - # clip_pwr_no_nan = clip_pwr[clip_pwr > lower] + lower = clip_pwr.fillna(0).quantile(0.99) / 1000 + clip_pwr_no_nan = clip_pwr[clip_pwr > lower] - daily_mean = clip_pwr.resample("D").mean() + daily_mean = clip_pwr_no_nan.resample("D").mean() df_daily = daily_mean.to_frame(name="mean") - df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.99) - df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.075) + df_daily["clipping_max"] = clip_pwr_no_nan.groupby(pd.Grouper(freq="D")).quantile(0.99) + df_daily["clipping_min"] = clip_pwr_no_nan.groupby(pd.Grouper(freq="D")).quantile(0.075) daily_clipping_max = df_daily["clipping_max"].reindex( index=power_ac_copy.index, method="ffill" ) From 1670fc618e9d5411e29627116a5e8aab87ce913b Mon Sep 17 00:00:00 2001 From: martin-springer Date: Thu, 12 Dec 2024 13:56:57 -0500 Subject: [PATCH 03/15] introduce robust utility functions --- rdtools/__init__.py | 3 ++ rdtools/degradation.py | 3 +- rdtools/filtering.py | 42 ++++++++-------------- rdtools/utilities.py | 79 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 28 deletions(-) create mode 100644 rdtools/utilities.py diff --git a/rdtools/__init__.py b/rdtools/__init__.py index 03ea0288..2ef39307 100644 --- a/rdtools/__init__.py +++ b/rdtools/__init__.py @@ -36,6 +36,9 @@ # from rdtools.plotting import soiling_rate_histogram # from rdtools.plotting import availability_summary_plots # from rdtools.availability import AvailabilityAnalysis +from rdtools.utilities import robust_quantile +from rdtools.utilities import robust_median +from rdtools.utilities import robust_mean from . import _version __version__ = _version.get_versions()['version'] diff --git a/rdtools/degradation.py b/rdtools/degradation.py index ba50476e..4bd9ba57 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -5,6 +5,7 @@ import statsmodels.api as sm from rdtools.bootstrap import _make_time_series_bootstrap_samples, \ _construct_confidence_intervals +from rdtools import utilities def degradation_ols(energy_normalized, confidence_level=68.2): @@ -259,7 +260,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, if recenter: start = energy_normalized.index[0] oneyear = start + pd.Timedelta('364d') - renorm = energy_normalized[start:oneyear].median() + renorm = utilities.robust_median(energy_normalized[start:oneyear]) else: renorm = 1.0 diff --git a/rdtools/filtering.py b/rdtools/filtering.py index 362defd3..5d7362fb 100644 --- a/rdtools/filtering.py +++ b/rdtools/filtering.py @@ -8,6 +8,7 @@ from scipy.interpolate import interp1d import rdtools import xgboost as xgb +from rdtools import utilities # Load in the XGBoost clipping model using joblib. xgboost_clipping_model = None @@ -335,8 +336,7 @@ def quantile_clip_filter(power_ac, quantile=0.98): """ Filter data points likely to be affected by clipping with power or energy greater than or equal to 99% of the `quant` - quantile. NaN's and small values (power_ac(quantile) / 1000) are - removed before calculating clipping threshold. + quantile. Parameters ---------- @@ -351,16 +351,8 @@ def quantile_clip_filter(power_ac, quantile=0.98): Boolean Series of whether the given measurement is below 99% of the quantile filter. """ - # Replace NaN's and small values for quantile calculation - # This ensures that power series with NaN's instead of zero values - # provide the same result. - lower = power_ac.fillna(0).quantile(quantile) / 1000 - - # Calculate the quantile and upper clipping threshold - q = power_ac[power_ac > lower].quantile(quantile) - upper = q * 0.99 - - return power_ac < upper + v = utilities.robust_quantile(power_ac, quantile) + return power_ac < v * 0.99 def _format_clipping_time_series(power_ac, mounting_type): @@ -519,18 +511,15 @@ def _apply_overall_clipping_threshold(power_ac, clipping_mask, clipped_power_ac) periods are labeled as True and non-clipping periods are labeled as False. Has a pandas datetime index. """ - - # Ensure that series with NaN's return same results as series with 0's - lower = power_ac.fillna(0).quantile(0.99) / 1000 - power_ac_quant = power_ac[power_ac > lower].quantile(0.99) + q_power_ac = utilities.robust_quantile(power_ac, 0.99) + q_clipped_power_ac = utilities.robust_quantile(clipped_power_ac, 0.99) upper_bound_pdiff = abs( - (power_ac_quant - clipped_power_ac.quantile(0.99)) - / ((power_ac_quant + clipped_power_ac.quantile(0.99)) / 2) + (q_power_ac - q_clipped_power_ac) / ((q_power_ac + q_clipped_power_ac) / 2) ) percent_clipped = len(clipped_power_ac) / len(power_ac) * 100 if (upper_bound_pdiff < 0.005) & (percent_clipped > 4): - max_clip = power_ac >= power_ac_quant + max_clip = power_ac >= q_power_ac clipping_mask = clipping_mask | max_clip return clipping_mask @@ -656,15 +645,14 @@ def logic_clip_filter( # Set any values within the clipping max + clipping min threshold # as clipping. This is done specifically for capturing the noise # for high frequency data sets. - - # Ensure that time series with zeros and nan's return same result - lower = clip_pwr.fillna(0).quantile(0.99) / 1000 - clip_pwr_no_nan = clip_pwr[clip_pwr > lower] - - daily_mean = clip_pwr_no_nan.resample("D").mean() + daily_mean = clip_pwr.resample("D").mean() df_daily = daily_mean.to_frame(name="mean") - df_daily["clipping_max"] = clip_pwr_no_nan.groupby(pd.Grouper(freq="D")).quantile(0.99) - df_daily["clipping_min"] = clip_pwr_no_nan.groupby(pd.Grouper(freq="D")).quantile(0.075) + df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg( + utilities.robust_quantile, q=0.99 + ) + df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg( + utilities.robust_quantile, q=0.075 + ) daily_clipping_max = df_daily["clipping_max"].reindex( index=power_ac_copy.index, method="ffill" ) diff --git a/rdtools/utilities.py b/rdtools/utilities.py new file mode 100644 index 00000000..f599237b --- /dev/null +++ b/rdtools/utilities.py @@ -0,0 +1,79 @@ +"""Utility functions for rdtools.""" + + +def robust_quantile(x, q): + """ + Compute the q-th quantile of a time series (x), ignoring small values and NaN's. + NaN's and small values [x < Q(x,q)/1000] are removed before calculating the quantile. + This function ensures that time series with NaN's and distributions without + NaN's return the same results. + + Parameters + ---------- + x : pandas.Series + Input time series. + q : float + Probability value. + + Returns + ------- + quantile : float + The q-th quantile of x, ignoring small values and NaN's. + """ + + small = x.fillna(0).quantile(q) / 1000 + q = x[x > small].quantile(q) + + return q + + +def robust_median(x, q=0.99): + """ + Compute the median of a time series (x), ignoring small values and NaN's. + NaN's and small values [Q(x,q)/1000] are removed before calculating the mean. + This function ensures that time series with NaN's and distributions without + NaN's return the same results. + + Parameters + ---------- + x : pandas.Series + Input time series. + q : float, default 0.99 + Probability value to use for the small values threshold calculation [Q(x,q)/1000]. + + Returns + ------- + quantile : float + The q-th quantile of x, ignoring small values and NaN's. + """ + + small = x.fillna(0).quantile(q) / 1000 + mdn = x[x > small].median() + + return mdn + + +def robust_mean(x, q=0.99): + """ + Compute the mean of a time series (x), ignoring small values and NaN's. + NaN's and small values [x < Q(x,q)/1000] are removed before calculating the mean. + This function ensures that time series with NaN's and distributions without + NaN's return the same results. + + Parameters + ---------- + x : pandas.Series + Input time series. + q : float, default 0.99 + Probability value to use for the small values threshold calculation. + + Returns + ------- + quantile : float + The q-th quantile of x, ignoring small values and NaN's. + """ + + small = x.fillna(0).quantile(q) / 1000 + m = x[x > small].mean() + + return m From 523ef2893e41828b1930ff44005bfe9f9f896cca Mon Sep 17 00:00:00 2001 From: martin-springer Date: Thu, 12 Dec 2024 15:33:39 -0500 Subject: [PATCH 04/15] fix downcasting warning --- rdtools/utilities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rdtools/utilities.py b/rdtools/utilities.py index f599237b..0fb9f3f7 100644 --- a/rdtools/utilities.py +++ b/rdtools/utilities.py @@ -21,7 +21,7 @@ def robust_quantile(x, q): The q-th quantile of x, ignoring small values and NaN's. """ - small = x.fillna(0).quantile(q) / 1000 + small = x.astype(float).fillna(0).quantile(q) / 1000 q = x[x > small].quantile(q) return q @@ -47,7 +47,7 @@ def robust_median(x, q=0.99): The q-th quantile of x, ignoring small values and NaN's. """ - small = x.fillna(0).quantile(q) / 1000 + small = x.astype(float).fillna(0).quantile(q) / 1000 mdn = x[x > small].median() return mdn @@ -73,7 +73,7 @@ def robust_mean(x, q=0.99): The q-th quantile of x, ignoring small values and NaN's. """ - small = x.fillna(0).quantile(q) / 1000 + small = x.astype(float).fillna(0).quantile(q) / 1000 m = x[x > small].mean() return m From 690e7773077782c56ef14571b81203fc539a3c11 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Thu, 12 Dec 2024 15:54:37 -0500 Subject: [PATCH 05/15] add unit tests --- rdtools/test/utilities_test.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 rdtools/test/utilities_test.py diff --git a/rdtools/test/utilities_test.py b/rdtools/test/utilities_test.py new file mode 100644 index 00000000..b20e7255 --- /dev/null +++ b/rdtools/test/utilities_test.py @@ -0,0 +1,43 @@ +import pandas as pd +import numpy as np +import pytest +from rdtools.utilities import robust_quantile, robust_median, robust_mean + + +@pytest.fixture +def data(): + data_zeros = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + data_nan = pd.Series([np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + return data_zeros, data_nan + + +def test_robust_quantile(data): + data_zeros, data_nan = data + quantile = 0.5 + expected_result = 5.5 + assert expected_result == robust_quantile(data_zeros, quantile) + assert expected_result == robust_quantile(data_nan, quantile) + + quantile = 0.25 + expected_result = 3.25 + assert expected_result == robust_quantile(data_zeros, quantile) + assert expected_result == robust_quantile(data_nan, quantile) + + quantile = 0.75 + expected_result = 7.75 + assert expected_result == robust_quantile(data_zeros, quantile) + assert expected_result == robust_quantile(data_nan, quantile) + + +def test_robust_median(data): + data_zeros, data_nan = data + expected_result = 5.5 + assert expected_result == robust_median(data_zeros) + assert expected_result == robust_median(data_nan) + + +def test_robust_mean(data): + data_zeros, data_nan = data + expected_result = 5.5 + assert expected_result == robust_mean(data_zeros) + assert expected_result == robust_mean(data_nan) From e1e4b52623248c2b11be16c4d497f7c2de65c619 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 09:26:20 -0500 Subject: [PATCH 06/15] move changes to pending changlog --- docs/sphinx/source/changelog.rst | 1 + docs/sphinx/source/changelog/pending.rst | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 docs/sphinx/source/changelog/pending.rst diff --git a/docs/sphinx/source/changelog.rst b/docs/sphinx/source/changelog.rst index 54085c79..7fef9d35 100644 --- a/docs/sphinx/source/changelog.rst +++ b/docs/sphinx/source/changelog.rst @@ -1,5 +1,6 @@ RdTools Change Log ================== +.. include:: changelog/pending.rst .. include:: changelog/v3.0.0-beta.0.rst .. include:: changelog/v2.2.0-beta.2.rst .. include:: changelog/v2.2.0-beta.1.rst diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst new file mode 100644 index 00000000..5d45c2e9 --- /dev/null +++ b/docs/sphinx/source/changelog/pending.rst @@ -0,0 +1,8 @@ +************************** +v3.0.0 (December XX, 2024) +************************** + + +Bug fixes +--------- +* Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) \ No newline at end of file From dc373e08a1eea9fa7ac01441545485d7f61238e7 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 09:37:14 -0500 Subject: [PATCH 07/15] move changes to pending changelog 3 --- docs/sphinx/source/changelog/pending.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 5d45c2e9..2f32d3a0 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -2,7 +2,13 @@ v3.0.0 (December XX, 2024) ************************** +Enhancements +------------ +* Add `CITATION.cff` file for citation information (:pull:`434`) +* Added checks to TrendAnalysis for `filter_params` and `filter_params_aggregated`. Raises an error if unkown filter is supplied. (:pull:`436`) + Bug fixes --------- -* Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) \ No newline at end of file +* Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) + From 05625eb00b3acd857d99f4ee69c1fd4f4d3baa64 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 09:49:00 -0500 Subject: [PATCH 08/15] move changes to pending changelog 4 --- docs/sphinx/source/changelog/pending.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 2f32d3a0..06e34d3f 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -11,4 +11,5 @@ Enhancements Bug fixes --------- * Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) +* Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`) From 0c0f19bd2c2b830b16a0d610680777f74d527622 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 10:12:40 -0500 Subject: [PATCH 09/15] move changes to pending changelog 5 --- docs/sphinx/source/changelog/pending.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 06e34d3f..c999c1f1 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -13,3 +13,8 @@ Bug fixes * Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) * Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`) + +Requirements +------------ +* Updated tornado==6.4.2 in ``notebook_requirements.txt`` (:pull:`438`) + From 11b76c62e56cf427e238e4f14634c141fae97764 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 10:15:23 -0500 Subject: [PATCH 10/15] move changes to pending changelog 6 --- docs/sphinx/source/changelog/pending.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index c999c1f1..12766af0 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -12,6 +12,7 @@ Bug fixes --------- * Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) * Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`) +* Add warning to clearsky workflow when power_expected is passed by user (:pull:`439`) Requirements From 829a06fe74014ac4f6d88300f51cb414570d52f9 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 10:18:29 -0500 Subject: [PATCH 11/15] move changes to pending changelog 7 --- docs/sphinx/source/changelog/pending.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 12766af0..8497d34e 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -19,3 +19,8 @@ Requirements ------------ * Updated tornado==6.4.2 in ``notebook_requirements.txt`` (:pull:`438`) + +Tests +----- +* Add tests for pvlib clearsky fiter in analysis chain (:pull:`441`) + From 966a3f65ff73aadc27059c59ee10370414c50f2e Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 12:31:43 -0500 Subject: [PATCH 12/15] update code to utility function --- rdtools/analysis_chains.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py index 95330de4..6c28b370 100644 --- a/rdtools/analysis_chains.py +++ b/rdtools/analysis_chains.py @@ -8,7 +8,7 @@ import numpy as np import matplotlib.pyplot as plt from rdtools import normalization, filtering, aggregation, degradation -from rdtools import clearsky_temperature, plotting +from rdtools import clearsky_temperature, plotting, utilities import warnings @@ -436,10 +436,9 @@ def _pvwatts_norm(self, poa_global, temperature_cell): if renorm: # Normalize to the 95th percentile for convenience, this is renormalized out # in the calculations but is relevant to normalized_filter() - lower = energy_normalized.fillna(0).quantile(0.95) / 1000 - x = energy_normalized[energy_normalized > lower] - # x = energy_normalized[np.isfinite(energy_normalized)] - energy_normalized = energy_normalized / x.quantile(0.95) + q = utilities.robust_quantile(energy_normalized[np.isfinite(energy_normalized)], 0.95) + + energy_normalized = energy_normalized / q return energy_normalized, insolation From 55cfae0479c63d3c1e44790009d2d8937e5154d4 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 12:47:26 -0500 Subject: [PATCH 13/15] test_sensor_analysis_nans --- rdtools/test/analysis_chains_test.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py index efe78df4..8f2f9156 100644 --- a/rdtools/test/analysis_chains_test.py +++ b/rdtools/test/analysis_chains_test.py @@ -67,6 +67,48 @@ def sensor_analysis(sensor_parameters): return rd_analysis +@pytest.fixture +def sensor_analysis_nans(sensor_parameters): + def randomly_replace_with(series, replace_with=0, fraction=0.1, seed=None): + """ + Randomly replace a fraction of entries in a pandas Series with input value `replace_with`. + + Parameters: + series (pd.Series): The input pandas Series. + fraction (float): The fraction of entries to replace with 0. Default is 0.1 (10%). + seed (int, optional): Seed for the random number generator for reproducibility. + + Returns: + pd.Series: The modified pandas Series with some entries replaced by 0. + """ + if seed is not None: + np.random.seed(seed) + + # Determine the number of entries to replace + n_replace = int(len(series) * fraction) + + # Randomly select indices to replace + replace_indices = np.random.choice(series.index, size=n_replace, replace=False) + + # Replace selected entries with + series.loc[replace_indices] = replace_with + + return series + + sensor_parameters_zeros = sensor_parameters.copy() + sensor_parameters_nans = sensor_parameters.copy() + + sensor_parameters_zeros["pv"] = randomly_replace_with(sensor_parameters["pv"], seed=0) + sensor_parameters_nans["pv"] = sensor_parameters_zeros["pv"].replace(0, np.nan) + + rd_analysis_zeros = TrendAnalysis(**sensor_parameters_zeros) + rd_analysis_zeros.sensor_analysis(analyses=["yoy_degradation"]) + + rd_analysis_nans = TrendAnalysis(**sensor_parameters_nans) + rd_analysis_nans.sensor_analysis(analyses=["yoy_degradation"]) + return rd_analysis_zeros, rd_analysis_nans + + @pytest.fixture def sensor_analysis_exp_power(sensor_parameters): power_expected = normalization.pvwatts_dc_power( @@ -155,6 +197,21 @@ def test_sensor_analysis(sensor_analysis): assert [-1, -1] == pytest.approx(ci, abs=1e-2) +def test_sensor_analysis_nans(sensor_analysis_nans): + rd_analysis_zeros, rd_analysis_nans = sensor_analysis_nans + + yoy_results_zeros = rd_analysis_zeros.results["sensor"]["yoy_degradation"] + rd_zeros = yoy_results_zeros["p50_rd"] + ci_zeros = yoy_results_zeros["rd_confidence_interval"] + + yoy_results_nans = rd_analysis_nans.results["sensor"]["yoy_degradation"] + rd_nans = yoy_results_nans["p50_rd"] + ci_nans = yoy_results_nans["rd_confidence_interval"] + + assert rd_zeros == pytest.approx(rd_nans, abs=1e-2) + assert ci_zeros == pytest.approx(ci_nans, abs=1e-1) + + def test_sensor_analysis_filter_components(sensor_analysis): columns = sensor_analysis.sensor_filter_components_aggregated.columns assert {'two_way_window_filter'} == set(columns) From 7f00aaf0e49a2ed27cfb577cc4e2106a36ab54c6 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 12:49:06 -0500 Subject: [PATCH 14/15] update changelog --- docs/sphinx/source/changelog/pending.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 8497d34e..28e56a5c 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -13,6 +13,7 @@ Bug fixes * Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`) * Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`) * Add warning to clearsky workflow when power_expected is passed by user (:pull:`439`) +* Fix different results with Nan's and Zeros in power series (:issue:`313`, :pull:`442`) Requirements From 43dffd48f579eac13a67b67eedaaa9efa0d941c3 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 13:27:01 -0500 Subject: [PATCH 15/15] change nbval workflow command? --- .github/workflows/nbval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nbval.yaml b/.github/workflows/nbval.yaml index abc712ae..e014b494 100644 --- a/.github/workflows/nbval.yaml +++ b/.github/workflows/nbval.yaml @@ -29,7 +29,7 @@ jobs: - name: Run notebook and check output run: | # --sanitize-with: pre-process text to remove irrelevant differences (e.g. warning filepaths) - pytest --nbval --sanitize-with docs/nbval_sanitization_rules.cfg docs/${{ matrix.notebook-file }} + pytest --nbval docs/${{ matrix.notebook-file }} --sanitize-with docs/nbval_sanitization_rules.cfg - name: Run notebooks again, save files run: | pip install nbconvert[webpdf]