Skip to content

Commit

Permalink
Update docstrings for new functions (#418)
Browse files Browse the repository at this point in the history
* filtering module doc strings

* Add new filter functions to __init__.py

* add filtering functions to api.rst

* Explain sensor_clearsky_filter

* this is why we have tests

* style fix
  • Loading branch information
mdeceglie authored Jul 2, 2024
1 parent 8d67b24 commit d1eb662
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 17 deletions.
5 changes: 5 additions & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ Filtering
poa_filter
tcell_filter
normalized_filter
two_way_window_filter
insolation_filter
hampel_filter
directional_tukey_filter
hour_angle_filter


Normalization
Expand Down
3 changes: 2 additions & 1 deletion rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ class TrendAnalysis:
filter_params_aggregated: dict
parameters to be passed to rdtools.filtering functions that specifically handle
aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions.
Values are dicts of parameters to be passed to those functions. Also has a special key
Values are dicts of parameters to be passed to those functions. To invoke `clearsky_filter`
for a sensor analysis, use the special key `sensor_clearsky_filter`. Also has a special key
`ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters.
filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering,
in which case those functions use default parameter values, `ad_hoc_filter`
Expand Down
87 changes: 71 additions & 16 deletions rdtools/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def pvlib_clearsky_filter(
**kwargs,
):
"""
Filtering based on the Reno and Hansen method for clearsky filtering
Filtering based on the Reno and Hansen method for clear-sky filtering
as implimented in pvlib. Requires a regular time series with uniform
time steps.
Expand Down Expand Up @@ -794,9 +794,8 @@ def _calculate_xgboost_model_features(df, sampling_frequency):

def xgboost_clip_filter(power_ac, mounting_type="fixed"):
"""
This function generates the features to run through the XGBoost
clipping model, runs the data through the model, and generates
model outputs.
This filter uses and XGBoost model to filter out
clipping periods in AC power or energy time series.
Parameters
----------
Expand Down Expand Up @@ -948,7 +947,7 @@ def two_way_window_filter(
series, roll_period=pd.to_timedelta("7 Days"), outlier_threshold=0.03
):
"""
Removes outliers based on forward and backward window of the rolling median. Points beyond
Removes anomalies based on forward and backward window of the rolling median. Points beyond
outlier_threshold from both the forward and backward-looking median are excluded by the filter.
Parameters
Expand All @@ -959,6 +958,11 @@ def two_way_window_filter(
The window to use for backward and forward
rolling medians for detecting outliers.
outlier_threshold : default is 0.03 meaning 3%
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

series = series / series.quantile(0.99)
Expand All @@ -984,23 +988,35 @@ def two_way_window_filter(

def insolation_filter(insolation, quantile=0.1):
"""
TODO: figure out if this should be more general
A simple quantile filter. Primary application in RdTools is to exclude
low insolation points after the aggregation step.
returns a filter that excludes everything below quantile from insolation
Parameters
----------
insolation: pandas.Series
Pandas time series to be filtered. Usually insolation.
quantile : float, default 0.1
the minimum quantile above which data is kept.
Returns
-------
pandas.Series
Boolean Series excluding points below the quantile threshold
"""

limit = insolation.quantile(quantile)
mask = insolation >= limit
return mask


def hampel_filter(vals, k="14d", t0=3):
def hampel_filter(series, k="14d", t0=3):
"""
Hampel outlier filter primarily applied on daily normalized data but broadly
Hampel outlier filter primarily applied after aggregation step, but broadly
applicable.
Parameters
----------
vals : pandas.Series
series : pandas.Series
daily normalized time series
k : int or time offset string e.g. 'd', default 14d
size of window including the sample; 14d is equal to 7 days on either
Expand All @@ -1010,13 +1026,13 @@ def hampel_filter(vals, k="14d", t0=3):
Returns
-------
pandas.Series
Boolean Series of whether the given measurement is within 3 sigma of the
median. False points indicate outliers to be removed.
Boolean Series of whether the given measurement is within t0 sigma of the
rolling median. False points indicate outliers to be excluded.
"""
# Hampel Filter
L = 1.4826
rolling_median = vals.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - vals)
rolling_median = series.rolling(k, center=True, min_periods=1).median()
difference = np.abs(rolling_median - series)
median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
threshold = t0 * L * median_abs_deviation
return difference <= threshold
Expand All @@ -1034,9 +1050,28 @@ def _tukey_fence(series, k=1.5):

def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.5):
"""
Performs a forward and backward looking rolling tukey filter. Points must only
pass one of either the forward or backward looking filters to be kept
Performs a forward and backward looking rolling Tukey filter. Points more than k*IQR
above the third quartile or below the first quartile are classified as outliers.Points
must only pass one of either the forward or backward looking filters to be kept.
Parameters
----------
series: pandas.Series
Pandas time series to be filtered.
roll_period : int or timedelta, default 7 days
The window to use for backward and forward
rolling medians for detecting outliers.
k : float
The Tukey parameter. Points more than k*IQR above the third quartile
or below the first quartile are classified as outliers.
Returns
-------
pandas.Series
Boolean Series excluding anomalies
"""

backward_median = series.rolling(roll_period, min_periods=5, closed="both").median()
forward_median = (
series.loc[::-1].rolling(roll_period, min_periods=5, closed="both").median()
Expand All @@ -1056,6 +1091,26 @@ def directional_tukey_filter(series, roll_period=pd.to_timedelta("7 Days"), k=1.
def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
"""
Creates a filter based on the hour angle of the sun (15 degrees per hour)
Parameters
----------
series: pandas.Series
Pandas time series to be filtered
lat: float
location latitude
lon: float
location longitude
min_hour_angle: float
minimum hour angle to include
max_hour_angle: float
maximum hour angle to include
Returns
-------
pandas.Series
Boolean Series excluding points outside the specified hour
angle range
"""

times = series.index
Expand Down

0 comments on commit d1eb662

Please sign in to comment.