Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue #313 (different results with Nan's and Zeros in power series) #442

Open
wants to merge 17 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/nbval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Run notebook and check output
run: |
# --sanitize-with: pre-process text to remove irrelevant differences (e.g. warning filepaths)
pytest --nbval --sanitize-with docs/nbval_sanitization_rules.cfg docs/${{ matrix.notebook-file }}
pytest --nbval docs/${{ matrix.notebook-file }} --sanitize-with docs/nbval_sanitization_rules.cfg
- name: Run notebooks again, save files
run: |
pip install nbconvert[webpdf]
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ docs/sphinx/source/generated
.eggs/
build/
dist/
tmp/
rdtools.egg-info*

# emacs temp files
Expand Down
1 change: 1 addition & 0 deletions docs/sphinx/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
RdTools Change Log
==================
.. include:: changelog/pending.rst
.. include:: changelog/v3.0.0-beta.0.rst
.. include:: changelog/v2.2.0-beta.2.rst
.. include:: changelog/v2.2.0-beta.1.rst
Expand Down
27 changes: 27 additions & 0 deletions docs/sphinx/source/changelog/pending.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
**************************
v3.0.0 (December XX, 2024)
**************************

Enhancements
------------
* Add `CITATION.cff` file for citation information (:pull:`434`)
* Added checks to TrendAnalysis for `filter_params` and `filter_params_aggregated`. Raises an error if unkown filter is supplied. (:pull:`436`)


Bug fixes
---------
* Set marker linewidth to zero in `rdtools.plotting.degradation_summary_plots` (:pull:`433`)
* Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`)
* Add warning to clearsky workflow when power_expected is passed by user (:pull:`439`)
* Fix different results with Nan's and Zeros in power series (:issue:`313`, :pull:`442`)


Requirements
------------
* Updated tornado==6.4.2 in ``notebook_requirements.txt`` (:pull:`438`)


Tests
-----
* Add tests for pvlib clearsky fiter in analysis chain (:pull:`441`)

3 changes: 3 additions & 0 deletions rdtools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
# from rdtools.plotting import soiling_rate_histogram
# from rdtools.plotting import availability_summary_plots
# from rdtools.availability import AvailabilityAnalysis
from rdtools.utilities import robust_quantile
from rdtools.utilities import robust_median
from rdtools.utilities import robust_mean

from . import _version
__version__ = _version.get_versions()['version']
8 changes: 4 additions & 4 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import matplotlib.pyplot as plt
from rdtools import normalization, filtering, aggregation, degradation
from rdtools import clearsky_temperature, plotting
from rdtools import clearsky_temperature, plotting, utilities
import warnings


Expand Down Expand Up @@ -436,8 +436,9 @@ def _pvwatts_norm(self, poa_global, temperature_cell):
if renorm:
# Normalize to the 95th percentile for convenience, this is renormalized out
# in the calculations but is relevant to normalized_filter()
x = energy_normalized[np.isfinite(energy_normalized)]
energy_normalized = energy_normalized / x.quantile(0.95)
q = utilities.robust_quantile(energy_normalized[np.isfinite(energy_normalized)], 0.95)

energy_normalized = energy_normalized / q

return energy_normalized, insolation

Expand Down Expand Up @@ -949,7 +950,6 @@ def sensor_analysis(
-------
None
"""

self._sensor_preprocess()
sensor_results = {}

Expand Down
3 changes: 2 additions & 1 deletion rdtools/degradation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import statsmodels.api as sm
from rdtools.bootstrap import _make_time_series_bootstrap_samples, \
_construct_confidence_intervals
from rdtools import utilities


def degradation_ols(energy_normalized, confidence_level=68.2):
Expand Down Expand Up @@ -259,7 +260,7 @@ def degradation_year_on_year(energy_normalized, recenter=True,
if recenter:
start = energy_normalized.index[0]
oneyear = start + pd.Timedelta('364d')
renorm = energy_normalized[start:oneyear].median()
renorm = utilities.robust_median(energy_normalized[start:oneyear])
else:
renorm = 1.0

Expand Down
21 changes: 13 additions & 8 deletions rdtools/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from scipy.interpolate import interp1d
import rdtools
import xgboost as xgb
from rdtools import utilities

# Load in the XGBoost clipping model using joblib.
xgboost_clipping_model = None
Expand Down Expand Up @@ -294,7 +295,7 @@ def pvlib_clearsky_filter(
def clip_filter(power_ac, model="logic", **kwargs):
"""
Master wrapper for running one of the desired clipping filters.
The default filter run is the quantile clipping filter.
The default filter run is the logic clipping filter.

Parameters
----------
Expand Down Expand Up @@ -350,7 +351,7 @@ def quantile_clip_filter(power_ac, quantile=0.98):
Boolean Series of whether the given measurement is below 99% of the
quantile filter.
"""
v = power_ac.quantile(quantile)
v = utilities.robust_quantile(power_ac, quantile)
return power_ac < v * 0.99


Expand Down Expand Up @@ -510,13 +511,15 @@ def _apply_overall_clipping_threshold(power_ac, clipping_mask, clipped_power_ac)
periods are labeled as True and non-clipping periods are
labeled as False. Has a pandas datetime index.
"""
q_power_ac = utilities.robust_quantile(power_ac, 0.99)
q_clipped_power_ac = utilities.robust_quantile(clipped_power_ac, 0.99)

upper_bound_pdiff = abs(
(power_ac.quantile(0.99) - clipped_power_ac.quantile(0.99))
/ ((power_ac.quantile(0.99) + clipped_power_ac.quantile(0.99)) / 2)
(q_power_ac - q_clipped_power_ac) / ((q_power_ac + q_clipped_power_ac) / 2)
)
percent_clipped = len(clipped_power_ac) / len(power_ac) * 100
if (upper_bound_pdiff < 0.005) & (percent_clipped > 4):
max_clip = power_ac >= power_ac.quantile(0.99)
max_clip = power_ac >= q_power_ac
clipping_mask = clipping_mask | max_clip
return clipping_mask

Expand Down Expand Up @@ -644,9 +647,11 @@ def logic_clip_filter(
# for high frequency data sets.
daily_mean = clip_pwr.resample("D").mean()
df_daily = daily_mean.to_frame(name="mean")
df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(0.99)
df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).quantile(
0.075
df_daily["clipping_max"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg(
utilities.robust_quantile, q=0.99
)
df_daily["clipping_min"] = clip_pwr.groupby(pd.Grouper(freq="D")).agg(
utilities.robust_quantile, q=0.075
)
daily_clipping_max = df_daily["clipping_max"].reindex(
index=power_ac_copy.index, method="ffill"
Expand Down
57 changes: 57 additions & 0 deletions rdtools/test/analysis_chains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,48 @@ def sensor_analysis(sensor_parameters):
return rd_analysis


@pytest.fixture
def sensor_analysis_nans(sensor_parameters):
def randomly_replace_with(series, replace_with=0, fraction=0.1, seed=None):
"""
Randomly replace a fraction of entries in a pandas Series with input value `replace_with`.

Parameters:
series (pd.Series): The input pandas Series.
fraction (float): The fraction of entries to replace with 0. Default is 0.1 (10%).
seed (int, optional): Seed for the random number generator for reproducibility.

Returns:
pd.Series: The modified pandas Series with some entries replaced by 0.
"""
if seed is not None:
np.random.seed(seed)

# Determine the number of entries to replace
n_replace = int(len(series) * fraction)

# Randomly select indices to replace
replace_indices = np.random.choice(series.index, size=n_replace, replace=False)

# Replace selected entries with
series.loc[replace_indices] = replace_with

return series

sensor_parameters_zeros = sensor_parameters.copy()
sensor_parameters_nans = sensor_parameters.copy()

sensor_parameters_zeros["pv"] = randomly_replace_with(sensor_parameters["pv"], seed=0)
sensor_parameters_nans["pv"] = sensor_parameters_zeros["pv"].replace(0, np.nan)

rd_analysis_zeros = TrendAnalysis(**sensor_parameters_zeros)
rd_analysis_zeros.sensor_analysis(analyses=["yoy_degradation"])

rd_analysis_nans = TrendAnalysis(**sensor_parameters_nans)
rd_analysis_nans.sensor_analysis(analyses=["yoy_degradation"])
return rd_analysis_zeros, rd_analysis_nans


@pytest.fixture
def sensor_analysis_exp_power(sensor_parameters):
power_expected = normalization.pvwatts_dc_power(
Expand Down Expand Up @@ -155,6 +197,21 @@ def test_sensor_analysis(sensor_analysis):
assert [-1, -1] == pytest.approx(ci, abs=1e-2)


def test_sensor_analysis_nans(sensor_analysis_nans):
rd_analysis_zeros, rd_analysis_nans = sensor_analysis_nans

yoy_results_zeros = rd_analysis_zeros.results["sensor"]["yoy_degradation"]
rd_zeros = yoy_results_zeros["p50_rd"]
ci_zeros = yoy_results_zeros["rd_confidence_interval"]

yoy_results_nans = rd_analysis_nans.results["sensor"]["yoy_degradation"]
rd_nans = yoy_results_nans["p50_rd"]
ci_nans = yoy_results_nans["rd_confidence_interval"]

assert rd_zeros == pytest.approx(rd_nans, abs=1e-2)
assert ci_zeros == pytest.approx(ci_nans, abs=1e-1)


def test_sensor_analysis_filter_components(sensor_analysis):
columns = sensor_analysis.sensor_filter_components_aggregated.columns
assert {'two_way_window_filter'} == set(columns)
Expand Down
43 changes: 43 additions & 0 deletions rdtools/test/utilities_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import numpy as np
import pytest
from rdtools.utilities import robust_quantile, robust_median, robust_mean


@pytest.fixture
def data():
data_zeros = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
data_nan = pd.Series([np.nan, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
return data_zeros, data_nan


def test_robust_quantile(data):
data_zeros, data_nan = data
quantile = 0.5
expected_result = 5.5
assert expected_result == robust_quantile(data_zeros, quantile)
assert expected_result == robust_quantile(data_nan, quantile)

quantile = 0.25
expected_result = 3.25
assert expected_result == robust_quantile(data_zeros, quantile)
assert expected_result == robust_quantile(data_nan, quantile)

quantile = 0.75
expected_result = 7.75
assert expected_result == robust_quantile(data_zeros, quantile)
assert expected_result == robust_quantile(data_nan, quantile)


def test_robust_median(data):
data_zeros, data_nan = data
expected_result = 5.5
assert expected_result == robust_median(data_zeros)
assert expected_result == robust_median(data_nan)


def test_robust_mean(data):
data_zeros, data_nan = data
expected_result = 5.5
assert expected_result == robust_mean(data_zeros)
assert expected_result == robust_mean(data_nan)
79 changes: 79 additions & 0 deletions rdtools/utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Utility functions for rdtools."""


def robust_quantile(x, q):
"""
Compute the q-th quantile of a time series (x), ignoring small values and NaN's.
NaN's and small values [x < Q(x,q)/1000] are removed before calculating the quantile.
This function ensures that time series with NaN's and distributions without
NaN's return the same results.

Parameters
----------
x : pandas.Series
Input time series.
q : float
Probability value.

Returns
-------
quantile : float
The q-th quantile of x, ignoring small values and NaN's.
"""

small = x.astype(float).fillna(0).quantile(q) / 1000
q = x[x > small].quantile(q)

return q


def robust_median(x, q=0.99):
"""
Compute the median of a time series (x), ignoring small values and NaN's.
NaN's and small values [Q(x,q)/1000] are removed before calculating the mean.
This function ensures that time series with NaN's and distributions without
NaN's return the same results.

Parameters
----------
x : pandas.Series
Input time series.
q : float, default 0.99
Probability value to use for the small values threshold calculation [Q(x,q)/1000].

Returns
-------
quantile : float
The q-th quantile of x, ignoring small values and NaN's.
"""

small = x.astype(float).fillna(0).quantile(q) / 1000
mdn = x[x > small].median()

return mdn


def robust_mean(x, q=0.99):
"""
Compute the mean of a time series (x), ignoring small values and NaN's.
NaN's and small values [x < Q(x,q)/1000] are removed before calculating the mean.
This function ensures that time series with NaN's and distributions without
NaN's return the same results.

Parameters
----------
x : pandas.Series
Input time series.
q : float, default 0.99
Probability value to use for the small values threshold calculation.

Returns
-------
quantile : float
The q-th quantile of x, ignoring small values and NaN's.
"""

small = x.astype(float).fillna(0).quantile(q) / 1000
m = x[x > small].mean()

return m
Loading