Skip to content

Commit

Permalink
Merge branch 'aggregated_filters_for_trials' into update-codecov-config
Browse files Browse the repository at this point in the history
  • Loading branch information
mdeceglie committed Jul 3, 2024
2 parents f905dac + 6ef4a0e commit bdef714
Show file tree
Hide file tree
Showing 12 changed files with 146 additions and 329 deletions.
6 changes: 5 additions & 1 deletion docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ Filtering
poa_filter
tcell_filter
normalized_filter
two_way_window_filter
insolation_filter
hampel_filter
directional_tukey_filter
hour_angle_filter


Normalization
Expand All @@ -127,7 +132,6 @@ Normalization
irradiance_rescale
normalize_with_expected_power
normalize_with_pvwatts
normalize_with_sapm
pvwatts_dc_power
sapm_dc_power
delta_index
Expand Down
7 changes: 6 additions & 1 deletion docs/sphinx/source/changelog/pending.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ Requirements
------------
* Specified versions in ``requirements.txt`` and ``docs/notebook_requirements.txt`` have been updated (:pull:`412`)

Deprecations
------------
* Removed :py:func:`~rdtools.normalization.sapm_dc_power` (:pull:`419`)
* Removed :py:func:`~rdtools.normalization.normalize_with_sapm` (:pull:`419`)

Contributors
------------
* Martin Springer (:ghuser:`martin-springer`)
* Michael Deceglie (:ghuser:`mdeceglie`)
* Michael Deceglie (:ghuser:`mdeceglie`)
1 change: 0 additions & 1 deletion rdtools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from rdtools.normalization import normalize_with_sapm
from rdtools.normalization import normalize_with_pvwatts
from rdtools.normalization import irradiance_rescale
from rdtools.normalization import energy_from_power
Expand Down
13 changes: 7 additions & 6 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ class TrendAnalysis:
filter_params_aggregated: dict
parameters to be passed to rdtools.filtering functions that specifically handle
aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions.
Values are dicts of parameters to be passed to those functions. Also has a special key
Values are dicts of parameters to be passed to those functions. To invoke `clearsky_filter`
for a sensor analysis, use the special key `sensor_clearsky_filter`. Also has a special key
`ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters.
filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering,
in which case those functions use default parameter values, `ad_hoc_filter`
Expand Down Expand Up @@ -532,11 +533,11 @@ def _call_clearsky_filter(filter_string):
filter_components["clearsky_filter"] = _call_clearsky_filter(
"clearsky_filter"
)
# TODO: Ask Mike about this section
# if "sensor_clearsky_filter" in self.filter_params:
# filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
# "sensor_clearsky_filter"
# )

if "sensor_clearsky_filter" in self.filter_params:
filter_components["sensor_clearsky_filter"] = _call_clearsky_filter(
"sensor_clearsky_filter"
)

# note: the previous implementation using the & operator treated NaN
# filter values as False, so we do the same here for consistency:
Expand Down
87 changes: 71 additions & 16 deletions rdtools/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def pvlib_clearsky_filter(
**kwargs,
):
"""
Filtering based on the Reno and Hansen method for clearsky filtering
Filtering based on the Reno and Hansen method for clear-sky filtering
as implimented in pvlib. Requires a regular time series with uniform
time steps.
Expand Down Expand Up @@ -794,9 +794,8 @@ def _calculate_xgboost_model_features(df, sampling_frequency):

def xgboost_clip_filter(power_ac, mounting_type="fixed"):
"""
This function generates the features to run through the XGBoost
clipping model, runs the data through the model, and generates
model outputs.
This filter uses and XGBoost model to filter out
clipping periods in AC power or energy time series.
Parameters
----------
Expand Down Expand Up @@ -948,7 +947,7 @@ def two_way_window_filter(
series, roll_period=pd.to_timedelta("7 Days"), outlier_threshold=0.03
):
"""
Removes outliers based on forward and backward window of the rolling median. Points beyond
Removes anomalies based on forward and backward window of the rolling median. Points beyond
outlier_threshold from both the forward and backward-looking median are excluded by the filter.
Parameters
Expand All @@ -959,6 +958,11 @@ def two_way_window_filter(
The window to use for backward and forward
rolling medians for detecting outliers.
outlier_threshold : default is 0.03 meaning 3%
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

series = series / series.quantile(0.99)
Expand All @@ -984,23 +988,35 @@ def two_way_window_filter(

def insolation_filter(insolation, quantile=0.1):
"""
TODO: figure out if this should be more general
A simple quantile filter. Primary application in RdTools is to exclude
low insolation points after the aggregation step.
returns a filter that excludes everything below quantile from insolation
Parameters
----------
insolation: pandas.Series
Pandas time series to be filtered. Usually insolation.
quantile : float, default 0.1
the minimum quantile above which data is kept.
Returns
-------
pandas.Series
Boolean Series excluding points below the quantile threshold
"""

limit = insolation.quantile(quantile)
mask = insolation >= limit
return mask


def hampel_filter(vals, k="14d", t0=3):
def hampel_filter(series, k="14d", t0=3):
"""
Hampel outlier filter primarily applied on daily normalized data but broadly
Hampel outlier filter primarily applied after aggregation step, but broadly
applicable.
Parameters
----------
vals : pandas.Series
series : pandas.Series
daily normalized time series
k : int or time offset string e.g. 'd', default 14d
size of window including the sample; 14d is equal to 7 days on either
Expand All @@ -1010,13 +1026,13 @@ def hampel_filter(vals, k="14d", t0=3):
Returns
-------
pandas.Series
Boolean Series of whether the given measurement is within 3 sigma of the
median. False points indicate outliers to be removed.
Boolean Series of whether the given measurement is within t0 sigma of the
rolling median. False points indicate outliers to be excluded.
"""
# Hampel Filter
L = 1.4826
rolling_median = vals.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - vals)
rolling_median = series.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - series)
median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
threshold = t0 * L * median_abs_deviation
return difference <= threshold
Expand All @@ -1034,9 +1050,28 @@ def _tukey_fence(series, k=1.5):

def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.5):
"""
Performs a forward and backward looking rolling tukey filter. Points must only
pass one of either the forward or backward looking filters to be kept
Performs a forward and backward looking rolling Tukey filter. Points more than k*IQR
above the third quartile or below the first quartile are classified as outliers.Points
must only pass one of either the forward or backward looking filters to be kept.
Parameters
----------
series: pandas.Series
Pandas time series to be filtered.
roll_period : int or timedelta, default 7 days
The window to use for backward and forward
rolling medians for detecting outliers.
k : float
The Tukey parameter. Points more than k*IQR above the third quartile
or below the first quartile are classified as outliers.
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

backward_median = series.rolling(roll_period, min_periods=5, closed="both").median()
forward_median = (
series.loc[::-1].rolling(roll_period, min_periods=5, closed="both").median()
Expand All @@ -1056,6 +1091,26 @@ def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.
def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
"""
Creates a filter based on the hour angle of the sun (15 degrees per hour)
Parameters
----------
series: pandas.Series
Pandas time series to be filtered
lat: float
location latitude
lon: float
location longitude
min_hour_angle: float
minimum hour angle to include
max_hour_angle: float
maximum hour angle to include
Returns
-------
pandas.Series
Boolean Series excluding points outside the specified hour
angle range
"""

times = series.index
Expand Down
133 changes: 0 additions & 133 deletions rdtools/normalization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
'''Functions for normalizing, rescaling, and regularizing PV system data.'''

import pandas as pd
import pvlib
import numpy as np
from scipy.optimize import minimize
import warnings
Expand Down Expand Up @@ -176,138 +175,6 @@ def normalize_with_pvwatts(energy, pvwatts_kws):
return energy_normalized, insolation


@deprecated(since='2.0.0', removal='3.0.0',
alternative='normalize_with_expected_power')
def sapm_dc_power(pvlib_pvsystem, met_data):
'''
Use Sandia Array Performance Model (SAPM) and PVWatts to compute the
effective DC power using measured irradiance, ambient temperature, and wind
speed. Effective irradiance and cell temperature are calculated with SAPM,
and DC power with PVWatts.
.. warning::
The ``pvlib_pvsystem`` argument must be a ``pvlib.pvsystem.LocalizedPVSystem``
object, which is no longer available as of pvlib 0.9.0. To use this function
you'll need to use an older version of pvlib.
Parameters
----------
pvlib_pvsystem : pvlib.pvsystem.LocalizedPVSystem
Object contains orientation, geographic coordinates, equipment
constants (including DC rated power in watts). The object must also
specify either the ``temperature_model_parameters`` attribute or both
``racking_model`` and ``module_type`` attributes to infer the temperature model parameters.
met_data : pandas.DataFrame
Measured irradiance components, ambient temperature, and wind speed.
Expected met_data DataFrame column names:
['DNI', 'GHI', 'DHI', 'Temperature', 'Wind Speed']
Note
----
All series are assumed to be right-labeled, meaning that the recorded
value at a given timestamp refers to the previous time interval
Returns
-------
power_dc : pandas.Series
DC power in watts derived using Sandia Array Performance Model and
PVWatts.
effective_poa : pandas.Series
Effective irradiance calculated with SAPM
'''

solar_position = pvlib_pvsystem.get_solarposition(met_data.index)

total_irradiance = pvlib_pvsystem\
.get_irradiance(solar_position['zenith'],
solar_position['azimuth'],
met_data['DNI'],
met_data['GHI'],
met_data['DHI'])

aoi = pvlib_pvsystem.get_aoi(solar_position['zenith'],
solar_position['azimuth'])

airmass = pvlib_pvsystem\
.get_airmass(solar_position=solar_position, model='kastenyoung1989')
airmass_absolute = airmass['airmass_absolute']

effective_irradiance = pvlib.pvsystem\
.sapm_effective_irradiance(poa_direct=total_irradiance['poa_direct'],
poa_diffuse=total_irradiance['poa_diffuse'],
airmass_absolute=airmass_absolute,
aoi=aoi,
module=pvlib_pvsystem.module)

temp_cell = pvlib_pvsystem\
.sapm_celltemp(total_irradiance['poa_global'],
met_data['Temperature'],
met_data['Wind Speed'])

power_dc = pvlib_pvsystem\
.pvwatts_dc(g_poa_effective=effective_irradiance,
temp_cell=temp_cell)

return power_dc, effective_irradiance


@deprecated(since='2.0.0', removal='3.0.0',
alternative='normalize_with_expected_power')
def normalize_with_sapm(energy, sapm_kws):
'''
Normalize system AC energy output given measured met_data and
meteorological data. This method relies on the Sandia Array Performance
Model (SAPM) to compute the effective DC energy using measured irradiance,
ambient temperature, and wind speed.
Energy timeseries and met_data timeseries can be different granularities.
.. warning::
The ``pvlib_pvsystem`` argument must be a ``pvlib.pvsystem.LocalizedPVSystem``
object, which is no longer available as of pvlib 0.9.0. To use this function
you'll need to use an older version of pvlib.
Parameters
----------
energy : pandas.Series
Energy time series to be normalized in watt hours.
Must be a right-labeled regular time series.
sapm_kws : dict
Dictionary of parameters required for sapm_dc_power function. See
Other Parameters.
Other Parameters
---------------
pvlib_pvsystem : pvlib.pvsystem.LocalizedPVSystem object
Object contains orientation, geographic coordinates, equipment
constants (including DC rated power in watts). The object must also
specify either the ``temperature_model_parameters`` attribute or both
``racking_model`` and ``module_type`` to infer the model parameters.
met_data : pandas.DataFrame
Measured met_data, ambient temperature, and wind speed. Expected
column names are ['DNI', 'GHI', 'DHI', 'Temperature', 'Wind Speed']
Note
----
All series are assumed to be right-labeled, meaning that the recorded
value at a given timestamp refers to the previous time interval
Returns
-------
energy_normalized : pandas.Series
Energy divided by Sandia Model DC energy.
insolation : pandas.Series
Insolation associated with each normalized point
'''

power_dc, irrad = sapm_dc_power(**sapm_kws)

energy_normalized, insolation = normalize_with_expected_power(energy, power_dc, irrad,
pv_input='energy')

return energy_normalized, insolation


def _delta_index(series):
'''
Takes a pandas series with a DatetimeIndex as input and
Expand Down
Loading

0 comments on commit bdef714

Please sign in to comment.