From 169c4375ae7e4cab791f621c72a4054825695794 Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Sun, 11 Jun 2017 10:08:50 +0100
Subject: [PATCH 1/6] Added acquisition functions

They appear to work but need better tests. This is is a commit from a previous branch.
---
 osprey/strategies.py | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/osprey/strategies.py b/osprey/strategies.py
index e5c792e..1d0feab 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -19,6 +19,10 @@
     from GPy.util.linalg import tdot
     from GPy.models import GPRegression
     from scipy.optimize import minimize
+    from scipy.stats import norm
+    # If the GPy modules fail we won't do this unnecessarily.
+    from .entry_point import load_entry_point
+    KERNEL_BASE_CLASS = kern.src.kern.Kern
 except:
     # GPy is optional, but required for gp
     GPRegression = kern = minimize = None
@@ -226,18 +230,33 @@ def _get_random_point(self):
         return np.array([np.random.uniform(low=0., high=1.)
                          for i in range(self.n_dims)])
 
+    def _expected_improvement(self, x):
+        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        y_std = np.sqrt(y_var)
+        y_best = self.model.Y.max(axis=0)
+        z = (y_mean - y_best)/y_std
+        result = y_std*(z*norm.cdf(z) + norm.pdf(z))
+        return result
+
+    def _upper_conf_bound(self, x, kappa=1.0):
+        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        y_std = np.sqrt(y_var)
+        result = y_mean + kappa*y_std
+        return result
+
     def _optimize(self, init=None):
+        # TODO start minimization from a range of points and take minimum
         if not init:
             init = self._get_random_point()
 
         def z(x):
-            y = x.copy().reshape(-1, self.n_dims)
-            s, v = self.model.predict(y, kern=(np.sum(self._kerns).copy() +
-                                               self._kernb.copy()))
-            return -(s+v).flatten()
-
-        return minimize(z, init, bounds=self.n_dims*[(0., 1.)],
-                        options={'maxiter': self.max_iter, 'disp': 0}).x
+            # TODO make spread of points around x and take mean value.
+            # TODO Could use options dict to specify what type of kernel to create when
+            af = self._expected_improvement(x)
+            return (-1)*af
+        res = minimize(z, init, bounds=self.n_dims*[(0., 1.)],
+                        options={'maxiter': self.max_iter, 'disp': 0})
+        return res.x
 
     def _get_data(self, history, searchspace):
         X = []
@@ -296,7 +315,9 @@ def suggest(self, history, searchspace, max_tries=5):
         self.n_dims = searchspace.n_dims
 
         X, Y, V, ignore = self._get_data(history, searchspace)
-        self._create_kernel(V)
+
+        # TODO make _create_kernel accept optional args.
+        self._create_kernel()
         self._fit_model(X, Y)
 
         suggestion = self._optimize()

From 0bbca292780d9df2d38de205738d15e0d6c2e4bc Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Sun, 11 Jun 2017 11:31:18 +0100
Subject: [PATCH 2/6] Added variable for determining acquisition function

Not tested yet.
---
 osprey/strategies.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/osprey/strategies.py b/osprey/strategies.py
index 1d0feab..248b5a8 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -202,7 +202,7 @@ def _hyperopt_fmin_random_kwarg(random):
 class GP(BaseStrategy):
     short_name = 'gp'
 
-    def __init__(self, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
+    def __init__(self, acquisition={'name':'ei', 'params': {}}, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
         self.seed = seed
         self.seeds = seeds
         self.max_feval = max_feval
@@ -213,6 +213,8 @@ def __init__(self, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
         self._kerns = None
         self._kernf = None
         self._kernb = None
+        self.acquisition_function = acquisition
+        self._acquisition_function = None
 
     def _create_kernel(self, V):
         self._kerns = [RBF(1, ARD=True, active_dims=[i])
@@ -230,7 +232,7 @@ def _get_random_point(self):
         return np.array([np.random.uniform(low=0., high=1.)
                          for i in range(self.n_dims)])
 
-    def _expected_improvement(self, x):
+    def _ei(self, x):
         y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
         y_std = np.sqrt(y_var)
         y_best = self.model.Y.max(axis=0)
@@ -238,12 +240,16 @@ def _expected_improvement(self, x):
         result = y_std*(z*norm.cdf(z) + norm.pdf(z))
         return result
 
-    def _upper_conf_bound(self, x, kappa=1.0):
+    def _ucb(self, x, kappa=1.0):
         y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
         y_std = np.sqrt(y_var)
         result = y_mean + kappa*y_std
         return result
 
+    def _osprey(self, x):
+        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        return (y_mean+y_var).flatten()
+
     def _optimize(self, init=None):
         # TODO start minimization from a range of points and take minimum
         if not init:
@@ -251,13 +257,30 @@ def _optimize(self, init=None):
 
         def z(x):
             # TODO make spread of points around x and take mean value.
-            # TODO Could use options dict to specify what type of kernel to create when
-            af = self._expected_improvement(x)
+            af = self._acquisition_function(x)
             return (-1)*af
         res = minimize(z, init, bounds=self.n_dims*[(0., 1.)],
                         options={'maxiter': self.max_iter, 'disp': 0})
         return res.x
 
+    def _set_acquisition(self):
+        # TODO move variable checking to a place consistent with other params
+        if len(self.acquisition_function) > 1:
+            raise RuntimeError('Must specify only one acquisition function')
+        if sorted(self.acquisition_function.keys()) != ['name', 'params']:
+            raise RuntimeError('strategy/params/acquisition must contain keys'
+                               '"name" and "params"')
+        if self.acquisition_function['name'] not in ['ei', 'ucb', 'osprey']:
+            raise RuntimeError('strategy/params/acquisition name must be one of '
+                               '"ei", "ucb", "osprey"')
+
+        f = eval('_'+self.acquisition_function['name'])
+
+        # This seems slightly convoluted.
+        def g(x):
+            return f(x, **self.acquisition_function['params'])
+        self._acquisition_function = g
+
     def _get_data(self, history, searchspace):
         X = []
         Y = []
@@ -318,6 +341,7 @@ def suggest(self, history, searchspace, max_tries=5):
 
         # TODO make _create_kernel accept optional args.
         self._create_kernel()
+        self._set_acquisition()
         self._fit_model(X, Y)
 
         suggestion = self._optimize()

From 095fe40ecb2e07ae7b6bb00600028657b29f75c3 Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Sun, 11 Jun 2017 14:47:50 +0100
Subject: [PATCH 3/6] New AFs don't cause crashes but variance is negative

Think this may be a problem with the kernel
---
 osprey/strategies.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/osprey/strategies.py b/osprey/strategies.py
index 248b5a8..55f5e5d 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -215,6 +215,7 @@ def __init__(self, acquisition={'name':'ei', 'params': {}}, seed=None, seeds=1,
         self._kernb = None
         self.acquisition_function = acquisition
         self._acquisition_function = None
+        self._set_acquisition()
 
     def _create_kernel(self, V):
         self._kerns = [RBF(1, ARD=True, active_dims=[i])
@@ -233,7 +234,9 @@ def _get_random_point(self):
                          for i in range(self.n_dims)])
 
     def _ei(self, x):
-        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
+                                                    self._kernb.copy()))
+        print(y_mean, y_var)
         y_std = np.sqrt(y_var)
         y_best = self.model.Y.max(axis=0)
         z = (y_mean - y_best)/y_std
@@ -241,13 +244,16 @@ def _ei(self, x):
         return result
 
     def _ucb(self, x, kappa=1.0):
-        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
+                                                    self._kernb.copy()))
         y_std = np.sqrt(y_var)
         result = y_mean + kappa*y_std
         return result
 
     def _osprey(self, x):
-        y_mean, y_var = self.model.predict(x.copy().reshape(-1, self.n_dims))
+        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
+                                                    self._kernb.copy()))
+        print(y_mean, y_var)
         return (y_mean+y_var).flatten()
 
     def _optimize(self, init=None):
@@ -257,6 +263,7 @@ def _optimize(self, init=None):
 
         def z(x):
             # TODO make spread of points around x and take mean value.
+            x = x.copy().reshape(-1, self.n_dims)
             af = self._acquisition_function(x)
             return (-1)*af
         res = minimize(z, init, bounds=self.n_dims*[(0., 1.)],
@@ -264,8 +271,7 @@ def z(x):
         return res.x
 
     def _set_acquisition(self):
-        # TODO move variable checking to a place consistent with other params
-        if len(self.acquisition_function) > 1:
+        if isinstance(self.acquisition_function, list):
             raise RuntimeError('Must specify only one acquisition function')
         if sorted(self.acquisition_function.keys()) != ['name', 'params']:
             raise RuntimeError('strategy/params/acquisition must contain keys'
@@ -274,11 +280,11 @@ def _set_acquisition(self):
             raise RuntimeError('strategy/params/acquisition name must be one of '
                                '"ei", "ucb", "osprey"')
 
-        f = eval('_'+self.acquisition_function['name'])
+        f = eval('self._'+self.acquisition_function['name'])
 
-        # This seems slightly convoluted.
         def g(x):
             return f(x, **self.acquisition_function['params'])
+
         self._acquisition_function = g
 
     def _get_data(self, history, searchspace):
@@ -340,10 +346,8 @@ def suggest(self, history, searchspace, max_tries=5):
         X, Y, V, ignore = self._get_data(history, searchspace)
 
         # TODO make _create_kernel accept optional args.
-        self._create_kernel()
-        self._set_acquisition()
+        self._create_kernel(V)
         self._fit_model(X, Y)
-
         suggestion = self._optimize()
 
         if suggestion in ignore or self._is_within(suggestion, X):

From 468d76e6c88f28da1992dedfd1f3e89051b965b5 Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Mon, 12 Jun 2017 17:21:54 +0100
Subject: [PATCH 4/6] Raises error for negative variance. All vars made +ve for
 testing

---
 osprey/strategies.py | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/osprey/strategies.py b/osprey/strategies.py
index 55f5e5d..4ca82d2 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -233,27 +233,26 @@ def _get_random_point(self):
         return np.array([np.random.uniform(low=0., high=1.)
                          for i in range(self.n_dims)])
 
-    def _ei(self, x):
-        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
-                                                    self._kernb.copy()))
-        print(y_mean, y_var)
+    def _is_var_positive(self, var):
+
+        if np.any(var < 0):
+            # RuntimeError may be overkill
+            raise RuntimeError('Negative variance predicted from regression model.')
+        else:
+            return True
+
+    def _ei(self, x, y_mean, y_var):
         y_std = np.sqrt(y_var)
         y_best = self.model.Y.max(axis=0)
         z = (y_mean - y_best)/y_std
         result = y_std*(z*norm.cdf(z) + norm.pdf(z))
         return result
 
-    def _ucb(self, x, kappa=1.0):
-        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
-                                                    self._kernb.copy()))
-        y_std = np.sqrt(y_var)
-        result = y_mean + kappa*y_std
+    def _ucb(self, x, y_mean, y_var, kappa=1.0):
+        result = y_mean + kappa*np.sqrt(y_var)
         return result
 
-    def _osprey(self, x):
-        y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
-                                                    self._kernb.copy()))
-        print(y_mean, y_var)
+    def _osprey(self, x, y_mean, y_var):
         return (y_mean+y_var).flatten()
 
     def _optimize(self, init=None):
@@ -264,8 +263,16 @@ def _optimize(self, init=None):
         def z(x):
             # TODO make spread of points around x and take mean value.
             x = x.copy().reshape(-1, self.n_dims)
-            af = self._acquisition_function(x)
+            y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
+                                                        self._kernb.copy()))
+            # for testing as this kernel seems to always give negative variance.
+            # TODO remove this.
+            y_var = np.abs(y_var)
+
+            if self._is_var_positive(y_var):
+                af = self._acquisition_function(x, y_mean=y_mean, y_var=y_var)
             return (-1)*af
+
         res = minimize(z, init, bounds=self.n_dims*[(0., 1.)],
                         options={'maxiter': self.max_iter, 'disp': 0})
         return res.x

From be3f0b9f0227034ee8b47d3c56852a725da99e4e Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Tue, 13 Jun 2017 09:49:17 +0100
Subject: [PATCH 5/6] New Acquisition functions working but not compatible with
 GP Kernel

The GP kernel gives negative variance.  As both the upper confidence bound and expected inprovement need standard deviation they don't work.
---
 docs/changelog.rst                       |  3 ++-
 osprey/data/sklearn_skeleton_config.yaml |  1 +
 osprey/strategies.py                     | 20 ++++++++++++--------
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index fd06e65..2ffaf63 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -19,7 +19,8 @@ New Features
 + Added ``max_param_suggestion_retries`` entry to the config file. This limits the number of times that
   ``strategy.suggest`` is called when attempting to produce a trial with a set of params not previously
   tested in the history. 
-
++ Added the ability to specify three different acquisition functions for the gaussian processes strategy: expected
+improvement `ei`, upper confidence bound, `ucb` and the original Osprey function (the default), `osprey`.
 
 Bug Fixes
 ~~~~~~~~~
diff --git a/osprey/data/sklearn_skeleton_config.yaml b/osprey/data/sklearn_skeleton_config.yaml
index 09fbed8..f80abf5 100644
--- a/osprey/data/sklearn_skeleton_config.yaml
+++ b/osprey/data/sklearn_skeleton_config.yaml
@@ -5,6 +5,7 @@ strategy:
   name: gp
   params:
     seeds: 5
+    acquisition: { name : ei, params : {}}
 
 search_space:
   C:
diff --git a/osprey/strategies.py b/osprey/strategies.py
index 4ca82d2..2ef50bb 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -202,7 +202,7 @@ def _hyperopt_fmin_random_kwarg(random):
 class GP(BaseStrategy):
     short_name = 'gp'
 
-    def __init__(self, acquisition={'name':'ei', 'params': {}}, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
+    def __init__(self, acquisition={'name':'osprey', 'params': {}}, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
         self.seed = seed
         self.seeds = seeds
         self.max_feval = max_feval
@@ -265,12 +265,16 @@ def z(x):
             x = x.copy().reshape(-1, self.n_dims)
             y_mean, y_var = self.model.predict(x, kern=(np.sum(self._kerns).copy() +
                                                         self._kernb.copy()))
-            # for testing as this kernel seems to always give negative variance.
+            # This code is for debug/testing phase only.
+            # Ideally we should test for negative variance regardless of the AF.
+            # However, we want to recover the original functionality of Osprey, hence the conditional block.
             # TODO remove this.
-            y_var = np.abs(y_var)
-
-            if self._is_var_positive(y_var):
+            if self.acquisition_function['name'] == 'osprey':
                 af = self._acquisition_function(x, y_mean=y_mean, y_var=y_var)
+            elif self.acquisition_function['name'] in ['ei', 'ucb']:
+                # y_var = np.abs(y_var)
+                if self._is_var_positive(y_var):
+                    af = self._acquisition_function(x, y_mean=y_mean, y_var=y_var)
             return (-1)*af
 
         res = minimize(z, init, bounds=self.n_dims*[(0., 1.)],
@@ -281,7 +285,7 @@ def _set_acquisition(self):
         if isinstance(self.acquisition_function, list):
             raise RuntimeError('Must specify only one acquisition function')
         if sorted(self.acquisition_function.keys()) != ['name', 'params']:
-            raise RuntimeError('strategy/params/acquisition must contain keys'
+            raise RuntimeError('strategy/params/acquisition must contain keys '
                                '"name" and "params"')
         if self.acquisition_function['name'] not in ['ei', 'ucb', 'osprey']:
             raise RuntimeError('strategy/params/acquisition name must be one of '
@@ -289,8 +293,8 @@ def _set_acquisition(self):
 
         f = eval('self._'+self.acquisition_function['name'])
 
-        def g(x):
-            return f(x, **self.acquisition_function['params'])
+        def g(x, y_mean, y_var):
+            return f(x, y_mean, y_var, **self.acquisition_function['params'])
 
         self._acquisition_function = g
 

From 9a86c472cb6e8f9d91c70faef6a6356621d530c2 Mon Sep 17 00:00:00 2001
From: Robert Arbon <robert.arbon@gmail.com>
Date: Tue, 13 Jun 2017 11:49:01 +0100
Subject: [PATCH 6/6] Made defaults in strategy PEP8 compliant

---
 osprey/strategies.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/osprey/strategies.py b/osprey/strategies.py
index 2ef50bb..7d66640 100644
--- a/osprey/strategies.py
+++ b/osprey/strategies.py
@@ -202,7 +202,7 @@ def _hyperopt_fmin_random_kwarg(random):
 class GP(BaseStrategy):
     short_name = 'gp'
 
-    def __init__(self, acquisition={'name':'osprey', 'params': {}}, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
+    def __init__(self, acquisition=None, seed=None, seeds=1, max_feval=5E4, max_iter=1E5):
         self.seed = seed
         self.seeds = seeds
         self.max_feval = max_feval
@@ -213,6 +213,8 @@ def __init__(self, acquisition={'name':'osprey', 'params': {}}, seed=None, seeds
         self._kerns = None
         self._kernf = None
         self._kernb = None
+        if acquisition is None:
+            acquisition = {'name': 'osprey', 'params': {}}
         self.acquisition_function = acquisition
         self._acquisition_function = None
         self._set_acquisition()