Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add random_state to HyperparameterOptimization class - Fixes for #125 #131

Merged
merged 14 commits into from
Jan 31, 2024
Merged
18 changes: 12 additions & 6 deletions luminaire/optimization/hyperparameter_optimization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from hyperopt import fmin, tpe, hp, STATUS_OK
from luminaire.model import LADStructuralModel, LADStructuralHyperParams, LADFilteringModel, LADFilteringHyperParams
from luminaire.exploration.data_exploration import DataExploration
from luminaire.utils.random_state_validation import check_random_state
import warnings
warnings.filterwarnings('ignore')


class HyperparameterOptimization(object):
"""
Hyperparameter optimization for LAD outlier detection configuration for batch data.
Expand All @@ -20,6 +20,7 @@ class HyperparameterOptimization(object):
:param int min_ts_length: The minimum required length of the time series for training. The input time series will be
truncated if the length is greater than this value.
:param int scoring_length: Number of innovations to be scored after training window with respect to the frequency.
:param int random_state: Turn seed into a np.random.RandomState instance

.. _Pandas offset: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
"""
Expand All @@ -31,6 +32,7 @@ def __init__(self,
max_ts_length=None,
min_ts_length=None,
scoring_length=None,
random_state=None,
**kwargs):
self._target_metric = 'raw'
self.freq = freq
Expand All @@ -48,6 +50,8 @@ def __init__(self,
self.scoring_length = scoring_length or (scoring_length_dict.get(freq)
if freq in scoring_length_dict.keys() else 30)

self.random_state = random_state

def _mape(self, actuals, predictions):
"""
This function computes the mean absolute percentage error for the observed vs the predicted values.
Expand Down Expand Up @@ -93,7 +97,8 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error):

# Anomaly detection based on synthetic anomalies generated through a given intensity list
for prop in self.anomaly_intensity_list:
trial_prob = np.random.uniform(0, 1, 1)
rnd = check_random_state(self.random_state)
trial_prob = rnd.uniform(0, 1, 1)
if trial_prob < 0.4:
synthetic_value = observation + (prop * observation)
anomaly_flags.append(1)
Expand Down Expand Up @@ -227,7 +232,8 @@ def _objective_part(self, data, smoothed_series, args):
anomaly_probabilities_list = []
local_model = copy.deepcopy(stable_model)
for i, row in scoring_data.iterrows():
trial_prob = np.random.uniform(0, 1, 1)
rnd = check_random_state(self.random_state)
trial_prob = rnd.random.uniform(0, 1, 1)
observed_value = row.raw
synthetic_actual = observed_value
if trial_prob < 0.4:
Expand Down Expand Up @@ -263,7 +269,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):
:return: Optimal hyperparameters
:rtype: dict
"""

import numpy as np
from functools import partial
from pykalman import KalmanFilter

Expand All @@ -288,7 +294,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):

try:
series = data[self._target_metric].values
kf = KalmanFilter()
kf = KalmanFilter(random_state=self.random_state)
smoothed_series, cov_series = kf.em(series).smooth(series)
except:
raise ValueError('Kalman Smoothing requires more than one data point')
Expand All @@ -299,7 +305,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):
raise ValueError('Only `detection_type=OutlierDetection` is supported in hyperparameter optimization right now')

# Calling the optimization function
hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True)
hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True, rstate=np.random.default_rng(self.random_state))
hyper_param['LuminaireModel'] = hyper_param_list[hyper_param['LuminaireModel']]['model']
if 'max_ft_freq' in hyper_param:
hyper_param['max_ft_freq'] = hyper_param['max_ft_freq'] + 2
Expand Down
11 changes: 9 additions & 2 deletions luminaire/tests/test_hyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@

class TestHyperparameterOptimization(object):

def test_run(self, test_data_with_missing):

def test_run1(self, test_data_with_missing):
"""Test using the default random_state=None"""
hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection')
hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5)

assert isinstance(hyper_parameters, dict)

def test_run2(self, test_data_with_missing):
"""Test defining a random_state"""
hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection', random_state=42)
hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5)

assert isinstance(hyper_parameters, dict)
1 change: 1 addition & 0 deletions luminaire/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .random_state_validation import check_random_state
23 changes: 23 additions & 0 deletions luminaire/utils/random_state_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
import numbers

def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance

:param int seed: seed for the random state
:return: None, int or instance of RandomState
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
:rtype: np.random.RandomState or None
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, numbers.Integral):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError(
"%r cannot be used to seed a numpy.random.RandomState instance" % seed
)
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ bayescd>=0.4
changepy>=0.3.1
hyperopt>=0.1.2
numpy>=1.17.5, <=1.22.4
pandas>=0.25.3
pandas>=0.25.3, <=2.0.3
pykalman>=0.9.5
scipy>=1.6.0
statsmodels>=0.13.0
Copy link
Collaborator Author

@sayanchk sayanchk Jan 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to add upper bounds due to this serialization issue with pandas and higher pandas and statsmodels versions. Will open a separate issues to look into these.

statsmodels>=0.13.0, <=0.13.5
scikit-learn>=0.24.2
decorator>=5.1.0
Loading