diff --git a/doc/_src_docs/surrogate_models.rst b/doc/_src_docs/surrogate_models.rst index 324c4c5ea..11f77ca80 100644 --- a/doc/_src_docs/surrogate_models.rst +++ b/doc/_src_docs/surrogate_models.rst @@ -18,6 +18,7 @@ SMT contains the surrogate modeling methods listed below. surrogate_models/gekpls surrogate_models/genn surrogate_models/mgp + surrogate_models/sgp Usage diff --git a/doc/_src_docs/surrogate_models.rstx b/doc/_src_docs/surrogate_models.rstx index 43f8f8c82..6e0b640f4 100644 --- a/doc/_src_docs/surrogate_models.rstx +++ b/doc/_src_docs/surrogate_models.rstx @@ -18,6 +18,7 @@ SMT contains the surrogate modeling methods listed below. surrogate_models/gekpls surrogate_models/genn surrogate_models/mgp + surrogate_models/sgp Usage diff --git a/doc/_src_docs/surrogate_models/sgp.rst b/doc/_src_docs/surrogate_models/sgp.rst new file mode 100644 index 000000000..07ff82c14 --- /dev/null +++ b/doc/_src_docs/surrogate_models/sgp.rst @@ -0,0 +1,371 @@ +Sparse Gaussian Process (SGP) +============================= + +Although the versatility of Gaussian Process regression models for learning complex data, their computational complexity, +which is :math:`\mathcal{O}(N^3)` with :math:`N` the number of training points, prevent their use to large datasets. +This complexity results from the inversion of the covariance matrix :math:`\mathbf{K}`. We must also highlight that the memory +cost of GPR models is :math:`\mathcal{O}(N^2)`, mainly due to the storage of the covariance matrix itself. + +To address these limitations, sparse GPs approximation methods have emerged as efficient alternatives. +Sparse GPs consider a set of inducing points to approximate the posterior Gaussian distribution with a low-rank representation, +while the variational inference provides a framework for approximating the posterior distribution directly. +Thus, these methods enable accurate modeling of large datasets while preserving computational efficiency +(typically :math:`\mathcal{O}(NM^2)` time and :math:`\mathcal{O}(NM)` memory for some chosen :math:`M, , , ] + - None + - The kernel to use for categorical inputs. Only for non continuous Kriging + * - hierarchical_kernel + - MixHrcKernelType.ALG_KERNEL + - [, ] + - None + - The kernel to use for mixed hierarchical inputs. Only for non continuous Kriging + * - nugget + - 1e-08 + - None + - ['float'] + - a jitter for numerical stability + * - theta0 + - [0.01] + - None + - ['list', 'ndarray'] + - Initial hyperparameters + * - theta_bounds + - [1e-06, 100.0] + - None + - ['list', 'ndarray'] + - bounds for hyperparameters + * - hyper_opt + - Cobyla + - ['Cobyla', 'TNC'] + - ['str'] + - Optimiser for hyperparameters optimisation + * - eval_noise + - True + - [True, False] + - ['bool'] + - Noise is always evaluated + * - noise0 + - [0.01] + - None + - ['list', 'ndarray'] + - Gaussian noise on observed training data + * - noise_bounds + - [2.220446049250313e-14, 10000000000.0] + - None + - ['list', 'ndarray'] + - bounds for noise hyperparameters + * - use_het_noise + - False + - [True, False] + - ['bool'] + - heteroscedastic noise evaluation flag + * - n_start + - 10 + - None + - ['int'] + - number of optimizer runs (multistart method) + * - xlimits + - None + - None + - ['list', 'ndarray'] + - definition of a design space of float (continuous) variables: array-like of size nx x 2 (lower, upper bounds) + * - design_space + - None + - None + - ['BaseDesignSpace', 'list', 'ndarray'] + - definition of the (hierarchical) design space: use `smt.utils.design_space.DesignSpace` as the main API. Also accepts list of float variable bounds + * - method + - FITC + - ['FITC', 'VFE'] + - ['str'] + - Method used by sparse GP model + * - n_inducing + - 10 + - None + - ['int'] + - Number of inducing inputs diff --git a/doc/_src_docs/surrogate_models/sgp.rstx b/doc/_src_docs/surrogate_models/sgp.rstx new file mode 100644 index 000000000..03f9c5f28 --- /dev/null +++ b/doc/_src_docs/surrogate_models/sgp.rstx @@ -0,0 +1,76 @@ +Sparse Gaussian Process (SGP) +============================= + +Although the versatility of Gaussian Process regression models for learning complex data, their computational complexity, +which is :math:`\mathcal{O}(N^3)` with :math:`N` the number of training points, prevent their use to large datasets. +This complexity results from the inversion of the covariance matrix :math:`\mathbf{K}`. We must also highlight that the memory +cost of GPR models is :math:`\mathcal{O}(N^2)`, mainly due to the storage of the covariance matrix itself. + +To address these limitations, sparse GPs approximation methods have emerged as efficient alternatives. +Sparse GPs consider a set of inducing points to approximate the posterior Gaussian distribution with a low-rank representation, +while the variational inference provides a framework for approximating the posterior distribution directly. +Thus, these methods enable accurate modeling of large datasets while preserving computational efficiency +(typically :math:`\mathcal{O}(NM^2)` time and :math:`\mathcal{O}(NM)` memory for some chosen :math:`M>> lkh=", likelihood) + # print(">>>>>>> MLL=", likelihood) return likelihood, params def _fitc(self, X, Y, Z, theta, sigma2, nugget): @@ -176,8 +203,11 @@ def _fitc(self, X, Y, Z, theta, sigma2, nugget): Ui = linalg.inv(U) V = Ui @ Kmn + # Assumption on the gaussian noise on training outputs + eta2 = np.array(self.options["noise0"]) + # Compute diagonal correction: nu = Knn_diag - Qnn_diag + \eta^2 - nu = Knn - np.sum(np.square(V), 0) + np.array(self.options["noise0"]) + nu = Knn - np.sum(np.square(V), 0) + eta2 # Compute beta, the effective noise precision beta = 1.0 / nu diff --git a/smt/surrogate_models/tests/test_surrogate_model_examples.py b/smt/surrogate_models/tests/test_surrogate_model_examples.py index b6adf44ed..5b892fc36 100644 --- a/smt/surrogate_models/tests/test_surrogate_model_examples.py +++ b/smt/surrogate_models/tests/test_surrogate_model_examples.py @@ -620,6 +620,113 @@ def fun(x): fig.subplots_adjust(top=0.74) plt.show() + def test_sgp_fitc(self): + import numpy as np + import matplotlib.pyplot as plt + + from smt.surrogate_models import SGP + + def f_obj(x): + import numpy as np + + return ( + np.sin(3 * np.pi * x) + + 0.3 * np.cos(9 * np.pi * x) + + 0.5 * np.sin(7 * np.pi * x) + ) + + # random generator for reproducibility + rng = np.random.RandomState(0) + + # Generate training data + nt = 200 + # Variance of the gaussian noise on our trainingg data + eta2 = [0.01] + gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1)) + xt = 2 * rng.rand(nt, 1) - 1 + yt = f_obj(xt) + gaussian_noise + + # Pick inducing points randomly in training data + n_inducing = 30 + random_idx = rng.permutation(nt)[:n_inducing] + Z = xt[random_idx].copy() + + sgp = SGP(noise0=eta2) # Assume here we have an idea of the variance eta2 + sgp.set_training_values(xt, yt) + sgp.set_inducing_inputs(Z=Z) + # sgp.set_inducing_inputs() # When Z not specified inducing points are picked randomly in traing data + sgp.train() + + x = np.linspace(-1, 1, nt + 1).reshape(-1, 1) + y = f_obj(x) + hat_y = sgp.predict_values(x) + var = sgp.predict_variances(x) + + # plot prediction + plt.figure(figsize=(14, 6)) + plt.plot(x, y, "C1-", label="target function") + plt.scatter(xt, yt, marker="o", s=10, label="observed data") + plt.plot(x, hat_y, "k-", label="Sparse GP") + plt.plot(x, hat_y - 3 * np.sqrt(var), "k--") + plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI") + plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points") + plt.ylim([-3, 3]) + plt.legend(loc=0) + plt.show() + + def test_sgp_vfe(self): + import numpy as np + import matplotlib.pyplot as plt + + from smt.surrogate_models import SGP + + def f_obj(x): + import numpy as np + + return ( + np.sin(3 * np.pi * x) + + 0.3 * np.cos(9 * np.pi * x) + + 0.5 * np.sin(7 * np.pi * x) + ) + + # random generator for reproducibility + rng = np.random.RandomState(42) + + # Generate training data + nt = 200 + # Variance of the gaussian noise on our trainingg data + eta2 = [0.01] + gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1)) + xt = 2 * rng.rand(nt, 1) - 1 + yt = f_obj(xt) + gaussian_noise + + # Pick inducing points randomly in training data + n_inducing = 30 + random_idx = rng.permutation(nt)[:n_inducing] + Z = xt[random_idx].copy() + + sgp = SGP(noise0=eta2, method="VFE") + sgp.set_training_values(xt, yt) + sgp.set_inducing_inputs(Z=Z) + sgp.train() + + x = np.linspace(-1, 1, nt + 1).reshape(-1, 1) + y = f_obj(x) + hat_y = sgp.predict_values(x) + var = sgp.predict_variances(x) + + # plot prediction + plt.figure(figsize=(14, 6)) + plt.plot(x, y, "C1-", label="target function") + plt.scatter(xt, yt, marker="o", s=10, label="observed data") + plt.plot(x, hat_y, "k-", label="Sparse GP") + plt.plot(x, hat_y - 3 * np.sqrt(var), "k--") + plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI") + plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points") + plt.ylim([-3, 3]) + plt.legend(loc=0) + plt.show() + if __name__ == "__main__": unittest.main()