From 7320305d40d57e4c5740cf922459044d35700f87 Mon Sep 17 00:00:00 2001
From: Saves Paul <paul.saves@alumni.enac.fr>
Date: Tue, 30 Jan 2024 10:32:02 +0100
Subject: [PATCH] Clean internal optimization within Kriging (#505)

* test TNC

* cobyla for sgp

* fix for gekpls

* fix gradient algorithm

* pl_exp_3
---
 smt/applications/mfkpls.py                    |  7 ++++++
 smt/applications/mfkplsk.py                   |  7 ++++++
 smt/applications/mixed_integer.py             |  4 ++--
 smt/applications/tests/test_vfm.py            | 24 +++++++++++++++----
 smt/surrogate_models/gekpls.py                |  7 ++++++
 smt/surrogate_models/krg_based.py             | 10 +++++---
 smt/surrogate_models/sgp.py                   |  7 ++++++
 .../tests/test_krg_het_noise.py               |  8 ++++++-
 smt/tests/test_array_outputs.py               |  2 +-
 smt/tests/test_kpls_auto.py                   |  7 ++----
 10 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/smt/applications/mfkpls.py b/smt/applications/mfkpls.py
index 8fb176bf5..39397f22c 100644
--- a/smt/applications/mfkpls.py
+++ b/smt/applications/mfkpls.py
@@ -34,6 +34,13 @@ def _initialize(self):
             desc="Correlation function type",
             types=(str),
         )
+        declare(
+            "hyper_opt",
+            "Cobyla",
+            values=("Cobyla"),
+            desc="Optimiser for hyperparameters optimisation",
+            types=str,
+        )
         declare("n_comp", 1, types=int, desc="Number of principal components")
         self.name = "MFKPLS"
 
diff --git a/smt/applications/mfkplsk.py b/smt/applications/mfkplsk.py
index b4fee4d7d..85086b49c 100644
--- a/smt/applications/mfkplsk.py
+++ b/smt/applications/mfkplsk.py
@@ -26,6 +26,13 @@ def _initialize(self):
             desc="Correlation function type",
             types=(str),
         )
+        declare(
+            "hyper_opt",
+            "Cobyla",
+            values=("Cobyla"),
+            desc="Optimiser for hyperparameters optimisation",
+            types=str,
+        )
         self.name = "MFKPLSK"
 
     def _componentwise_distance(self, dx, opt=0):
diff --git a/smt/applications/mixed_integer.py b/smt/applications/mixed_integer.py
index fc2587b98..1f7b55c41 100644
--- a/smt/applications/mixed_integer.py
+++ b/smt/applications/mixed_integer.py
@@ -97,7 +97,6 @@ def __init__(
         """
         super().__init__()
         self._surrogate = surrogate
-
         if isinstance(self._surrogate, KrgBased):
             raise ValueError(
                 "Using MixedIntegerSurrogateModel integer model with "
@@ -109,7 +108,6 @@ def __init__(
         self._input_in_folded_space = input_in_folded_space
         self.supports = self._surrogate.supports
         self.options["print_global"] = False
-
         if "poly" in self._surrogate.options:
             if self._surrogate.options["poly"] != "constant":
                 raise ValueError("constant regression must be used with mixed integer")
@@ -198,6 +196,7 @@ def __init__(
                 + " is not supported. Please use MixedIntegerSurrogateModel instead."
             )
         self.options["design_space"] = self._surrogate.design_space
+        self._surrogate.options["hyper_opt"] = "Cobyla"
 
         self._input_in_folded_space = input_in_folded_space
         self.supports = self._surrogate.supports
@@ -333,6 +332,7 @@ def build_kriging_model(self, surrogate):
         Build MixedIntegerKrigingModel from given SMT surrogate model.
         """
         surrogate.options["design_space"] = self._design_space
+        surrogate.options["hyper_opt"] = "Cobyla"
         return MixedIntegerKrigingModel(
             surrogate=surrogate,
             input_in_folded_space=self._work_in_folded_space,
diff --git a/smt/applications/tests/test_vfm.py b/smt/applications/tests/test_vfm.py
index ee099ad67..577bf6874 100644
--- a/smt/applications/tests/test_vfm.py
+++ b/smt/applications/tests/test_vfm.py
@@ -68,7 +68,12 @@ def test_vfm(self):
         Bridge_candidate = "KRG"
         type_bridge = "Multiplicative"
         optionsLF = {}
-        optionsB = {"theta0": [1e-2] * ndim, "print_prediction": False, "deriv": False}
+        optionsB = {
+            "theta0": [1e-2] * ndim,
+            "print_prediction": False,
+            "deriv": False,
+            "hyper_opt": "Cobyla",
+        }
 
         # Construct low/high fidelity data and validation points
         sampling = LHS(xlimits=funLF.xlimits, criterion="m", random_state=42)
@@ -138,7 +143,12 @@ def run_vfm_example(self):
         Bridge_candidate = "KRG"
         type_bridge = "Multiplicative"
         optionsLF = {}
-        optionsB = {"theta0": [1e-2] * ndim, "print_prediction": False, "deriv": False}
+        optionsB = {
+            "theta0": [1e-2] * ndim,
+            "print_prediction": False,
+            "deriv": False,
+            "hyper_opt": "Cobyla",
+        }
 
         # Construct low/high fidelity data and validation points
         sampling = LHS(xlimits=funLF.xlimits, criterion="m")
@@ -200,7 +210,9 @@ def test_KRG_KRG_additive(self):
             yp = M.predict_values(np.atleast_2d(xt[0]))
             dyp = M.predict_derivatives(np.atleast_2d(xt[0]), kx=0)
         self.assert_error(yp, np.array([[0.015368, 0.367424]]), atol=2e-2, rtol=3e-2)
-        self.assert_error(dyp, np.array([[0.07007729, 3.619421]]), atol=3e-1, rtol=1e-2)
+        self.assert_error(
+            dyp, np.array([[-3.11718627e-03, 3.19506239e00]]), atol=3e-1, rtol=1e-2
+        )
 
     def test_QP_KRG_additive(self):
         with Silence():
@@ -214,7 +226,7 @@ def test_QP_KRG_additive(self):
 
         self.assert_error(yp, np.array([[0.015368, 0.367424]]), atol=1e-2, rtol=1e-2)
         self.assert_error(
-            dyp, np.array([[1.16130832e-03, 4.36712162e00]]), atol=3e-1, rtol=1e-2
+            dyp, np.array([[0.02596425, 4.70243162]]), atol=3e-1, rtol=1e-2
         )
 
     def test_KRG_KRG_mult(self):
@@ -228,7 +240,9 @@ def test_KRG_KRG_mult(self):
             dyp = M.predict_derivatives(np.atleast_2d(xt[0]), kx=0)
 
         self.assert_error(yp, np.array([[0.015368, 0.367424]]), atol=2e-2, rtol=3e-2)
-        self.assert_error(dyp, np.array([[0.07007729, 3.619421]]), atol=3e-1, rtol=1e-2)
+        self.assert_error(
+            dyp, np.array([[-3.11718627e-03, 3.19506239e00]]), atol=3e-1, rtol=1e-2
+        )
 
     def test_QP_KRG_mult(self):
         with Silence():
diff --git a/smt/surrogate_models/gekpls.py b/smt/surrogate_models/gekpls.py
index 99e2d6943..adcfffecf 100644
--- a/smt/surrogate_models/gekpls.py
+++ b/smt/surrogate_models/gekpls.py
@@ -32,6 +32,13 @@ def _initialize(self):
             types=int,
             desc="Number of extra points per training point",
         )
+        declare(
+            "hyper_opt",
+            "Cobyla",
+            values=("Cobyla"),
+            desc="Optimiser for hyperparameters optimisation",
+            types=str,
+        )
         self.supports["training_derivatives"] = True
 
     def _check_param(self):
diff --git a/smt/surrogate_models/krg_based.py b/smt/surrogate_models/krg_based.py
index 158baf853..e13f14cdd 100644
--- a/smt/surrogate_models/krg_based.py
+++ b/smt/surrogate_models/krg_based.py
@@ -138,7 +138,7 @@ def _initialize(self):
         )
         declare(
             "hyper_opt",
-            "Cobyla",
+            "TNC",
             values=("Cobyla", "TNC"),
             desc="Optimiser for hyperparameters optimisation",
             types=str,
@@ -1016,7 +1016,7 @@ def _reduced_likelihood_gradient(self, theta):
         gamma = par["gamma"]
         Q = par["Q"]
         G = par["G"]
-        sigma_2 = par["sigma2"]
+        sigma_2 = par["sigma2"] + self.options["nugget"]
 
         nb_theta = len(theta)
         grad_red = np.zeros(nb_theta)
@@ -1908,6 +1908,10 @@ def grad_minus_reduced_likelihood_function(log10t):
                                 optimal_theta_res = optimal_theta_res_loop
 
                     elif self.options["hyper_opt"] == "TNC":
+                        if self.options["use_het_noise"]:
+                            raise ValueError(
+                                "For heteroscedastic noise, please use Cobyla"
+                            )
                         theta_all_loops = 10**theta_all_loops
                         for theta0_loop in theta_all_loops:
                             optimal_theta_res_loop = optimize.minimize(
@@ -1916,7 +1920,7 @@ def grad_minus_reduced_likelihood_function(log10t):
                                 method="TNC",
                                 jac=grad_minus_reduced_likelihood_function,
                                 bounds=bounds_hyp,
-                                options={"maxiter": 100},
+                                options={"maxfun": 2 * limit},
                             )
                             if optimal_theta_res_loop["fun"] < optimal_theta_res["fun"]:
                                 optimal_theta_res = optimal_theta_res_loop
diff --git a/smt/surrogate_models/sgp.py b/smt/surrogate_models/sgp.py
index 728d6b7ae..dd366d7ec 100644
--- a/smt/surrogate_models/sgp.py
+++ b/smt/surrogate_models/sgp.py
@@ -50,6 +50,13 @@ def _initialize(self):
             desc="Gaussian noise on observed training data",
             types=(list, np.ndarray),
         )
+        declare(
+            "hyper_opt",
+            "Cobyla",
+            values=("Cobyla"),
+            desc="Optimiser for hyperparameters optimisation",
+            types=str,
+        )
         declare(
             "eval_noise",
             True,  # for SGP evaluate noise by default
diff --git a/smt/surrogate_models/tests/test_krg_het_noise.py b/smt/surrogate_models/tests/test_krg_het_noise.py
index c06122533..a4c919dfe 100644
--- a/smt/surrogate_models/tests/test_krg_het_noise.py
+++ b/smt/surrogate_models/tests/test_krg_het_noise.py
@@ -23,7 +23,13 @@ def test_predict_output(self):
         xt_full = np.array(3 * xt.tolist())
         yt_full = np.concatenate((yt, yt + 0.2 * yt_std_rand, yt - 0.2 * yt_std_rand))
 
-        sm = KRG(theta0=[1.0], eval_noise=True, use_het_noise=True, n_start=1)
+        sm = KRG(
+            theta0=[1.0],
+            eval_noise=True,
+            use_het_noise=True,
+            n_start=1,
+            hyper_opt="Cobyla",
+        )
         sm.set_training_values(xt_full, yt_full)
         sm.train()
 
diff --git a/smt/tests/test_array_outputs.py b/smt/tests/test_array_outputs.py
index 5b7c9af2e..88b68ded3 100644
--- a/smt/tests/test_array_outputs.py
+++ b/smt/tests/test_array_outputs.py
@@ -38,7 +38,7 @@ def test_KRG(self):
             d0 = interp.predict_derivatives(np.atleast_2d(xt[10, :]), 0)
 
         self.assert_error(
-            d0, np.array([[0.06874097, 4.366292277996716]]), atol=0.55, rtol=0.15
+            d0, np.array([[0.24897752, 3.72290526]]), atol=0.55, rtol=0.15
         )
 
     def test_RBF(self):
diff --git a/smt/tests/test_kpls_auto.py b/smt/tests/test_kpls_auto.py
index d561d415a..c68b61648 100644
--- a/smt/tests/test_kpls_auto.py
+++ b/smt/tests/test_kpls_auto.py
@@ -46,12 +46,9 @@ def setUp(self):
         n_comp_opt["Branin"] = 2
         n_comp_opt["Rosenbrock"] = 1
         n_comp_opt["sphere"] = 1
-        if platform.startswith("linux"):  # result depends on platform
-            n_comp_opt["exp"] = 2
-        else:
-            n_comp_opt["exp"] = 3
+        n_comp_opt["exp"] = 3
         n_comp_opt["tanh"] = 1
-        n_comp_opt["cos"] = 1
+        n_comp_opt["cos"] = 2
 
         self.nt = nt
         self.ne = ne