add a random_state argument

to hyperparameter optimization module
zillow · shahsmit14 · Jan 31, 2024 · Jan 20, 2023 · Jan 20, 2023 · Jan 23, 2023
commit 8dbc83fa71082c2298381281365e00545e7fc82b
diff --git a/luminaire/optimization/hyperparameter_optimization.py b/luminaire/optimization/hyperparameter_optimization.py
@@ -4,6 +4,7 @@
 import warnings
 warnings.filterwarnings('ignore')
 
+from ..utils import check_random_state
 
 class HyperparameterOptimization(object):
     """
@@ -31,6 +32,7 @@ def __init__(self,
                  max_ts_length=None,
                  min_ts_length=None,
                  scoring_length=None,
+                 random_state=None,
                  **kwargs):
         self._target_metric = 'raw'
         self.freq = freq
@@ -48,6 +50,8 @@ def __init__(self,
         self.scoring_length = scoring_length or (scoring_length_dict.get(freq)
                                                  if freq in scoring_length_dict.keys() else 30)
 
+        self.random_state = random_state
+
     def _mape(self, actuals, predictions):
         """
         This function computes the mean absolute percentage error for the observed vs the predicted values.
@@ -93,7 +97,8 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error):
 
         # Anomaly detection based on synthetic anomalies generated through a given intensity list
         for prop in self.anomaly_intensity_list:
-            trial_prob = np.random.uniform(0, 1, 1)
+            rnd = check_random_state(self.random_state)
+            trial_prob = rnd.uniform(0, 1, 1)
             if trial_prob < 0.4:
                 synthetic_value = observation + (prop * observation)
                 anomaly_flags.append(1)
@@ -227,7 +232,8 @@ def _objective_part(self, data, smoothed_series, args):
                     anomaly_probabilities_list = []
                     local_model = copy.deepcopy(stable_model)
                     for i, row in scoring_data.iterrows():
-                        trial_prob = np.random.uniform(0, 1, 1)
+                        rnd = check_random_state(self.random_state)
+                        trial_prob = rnd.random.uniform(0, 1, 1)
                         observed_value = row.raw
                         synthetic_actual = observed_value
                         if trial_prob < 0.4:
@@ -288,7 +294,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):
 
             try:
                 series = data[self._target_metric].values
-                kf = KalmanFilter()
+                kf = KalmanFilter(random_state=self.random_state)
                 smoothed_series, cov_series = kf.em(series).smooth(series)
             except:
                 raise ValueError('Kalman Smoothing requires more than one data point')

diff --git a/luminaire/utils/__init__.py b/luminaire/utils/__init__.py
@@ -0,0 +1 @@
+from .validation import check_random_state
diff --git a/luminaire/utils/validation.py b/luminaire/utils/validation.py
@@ -0,0 +1,22 @@
+import numpy as np
+import numbers
+
+def check_random_state(seed):
+    """Turn seed into a np.random.RandomState instance
+    Parameters
+    ----------
+    seed : None, int or instance of RandomState
+        If seed is None, return the RandomState singleton used by np.random.
+        If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is already a RandomState instance, return it.
+        Otherwise raise ValueError.
+    """
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, numbers.Integral):
+        return np.random.RandomState(seed)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError(
+        "%r cannot be used to seed a numpy.random.RandomState instance" % seed
+    )