From b0b1ab61ca2c866308941019a02f6900fd5f30ab Mon Sep 17 00:00:00 2001 From: relf Date: Wed, 20 Sep 2023 10:13:22 +0200 Subject: [PATCH] Refactor and add comments Co-authored-by: hvalayer --- smt/surrogate_models/sgp.py | 164 ++++++++++++++++++------------------ 1 file changed, 83 insertions(+), 81 deletions(-) diff --git a/smt/surrogate_models/sgp.py b/smt/surrogate_models/sgp.py index 21298a094..cb294d60a 100644 --- a/smt/surrogate_models/sgp.py +++ b/smt/surrogate_models/sgp.py @@ -83,20 +83,6 @@ def _initialize(self): self.optimal_par = {} self.optimal_noise = None - def compute_K(self, A: np.ndarray, B: np.ndarray, theta, sigma2): - """ - Compute the covariance matrix K between A and B - """ - # Compute pairwise componentwise L1-distances between A and B - dx = differences(A, B) - d = self._componentwise_distance(dx) - # Compute the correlation vector r and matrix R - r = self._correlation_types[self.options["corr"]](theta, d) - R = r.reshape(A.shape[0], B.shape[0]) - # Compute the covariance matrix K - K = sigma2 * R - return K - def set_inducing_inputs(self, Z=None, normalize=False): """ Define number of inducing inputs or set the locations manually. @@ -137,6 +123,7 @@ def set_inducing_inputs(self, Z=None, normalize=False): self.normalize = False self.nz = Z.shape[0] + # overload kriging based implementation def _new_train(self): if self.Z is None: self.set_inducing_inputs() @@ -154,11 +141,8 @@ def _new_train(self): return super()._new_train() - # overload kriging based imlementation + # overload kriging based implementation def _reduced_likelihood_function(self, theta): - # print("variance=", theta[-1]) - # print("lengthscale=", 1.0 / np.sqrt(2.0 * theta[0:-1])) - # print("theta=", theta[0:-1]) X = self.training_points[None][0][0] Y = self.training_points[None][0][1] Z = self.Z @@ -183,18 +167,34 @@ def _reduced_likelihood_function(self, theta): "theta": theta, "sigma2": sigma2, } - # print(">>>>>>> MLL=", likelihood) + return likelihood, params + def _compute_K(self, A: np.ndarray, B: np.ndarray, theta, sigma2): + """ + Compute the covariance matrix K between A and B + """ + # Compute pairwise componentwise L1-distances between A and B + dx = differences(A, B) + d = self._componentwise_distance(dx) + # Compute the correlation vector r and matrix R + r = self._correlation_types[self.options["corr"]](theta, d) + R = r.reshape(A.shape[0], B.shape[0]) + # Compute the covariance matrix K + K = sigma2 * R + return K + def _fitc(self, X, Y, Z, theta, sigma2, nugget): - """FITC method implementation. + """ + FITC method implementation. + See also https://github.com/SheffieldML/GPy/blob/9ec3e50e3b96a10db58175a206ed998ec5a8e3e3/GPy/inference/latent_function_inference/fitc.py """ # Compute: diag(Knn), Kmm and Kmn Knn = np.full(self.nt, sigma2) - Kmm = self.compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget - Kmn = self.compute_K(Z, X, theta, sigma2) + Kmm = self._compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget + Kmn = self._compute_K(Z, X, theta, sigma2) # Compute (lower) Cholesky decomposition: Kmm = U U^T U = linalg.cholesky(Kmm, lower=True) @@ -216,16 +216,10 @@ def _fitc(self, X, Y, Z, theta, sigma2, nugget): L = linalg.cholesky(A, lower=True) Li = linalg.inv(L) - # back substitute to get b, P, v + # Compute a and b a = np.einsum("ij,i->ij", Y, beta) # avoid reshape for mat-vec multiplication b = Li @ V @ a - # For prediction - LiUi = Li @ Ui - LiUiT = LiUi.T - woodbury_vec = LiUiT @ b - woodbury_inv = Ui.T @ Ui - LiUiT @ LiUi - # Compute marginal log-likelihood likelihood = -0.5 * ( # num_data * np.log(2.0 * np.pi) # constant term ignored in reduced likelihood @@ -235,82 +229,90 @@ def _fitc(self, X, Y, Z, theta, sigma2, nugget): - np.einsum("ij,ij->", b, b) ) + # Store Woodbury vectors for prediction step + LiUi = Li @ Ui + LiUiT = LiUi.T + woodbury_vec = LiUiT @ b + woodbury_inv = Ui.T @ Ui - LiUiT @ LiUi + return likelihood, woodbury_vec, woodbury_inv def _vfe(self, X, Y, Z, theta, sigma2, nugget): - """VFE method implementation. - See also https://github.com/SheffieldML/GPy/blob/9ec3e50e3b96a10db58175a206ed998ec5a8e3e3/GPy/inference/latent_function_inference/fitc.py """ + VFE method implementation. - # model constants - num_data, output_dim = Y.shape + See also https://github.com/SheffieldML/GPy/blob/9ec3e50e3b96a10db58175a206ed998ec5a8e3e3/GPy/inference/latent_function_inference/fitc.py + """ # Assume zero mean function mean = 0 - # Assume Gaussian likelihood (precision is equivalent to beta in FITC) - precision = 1.0 / np.fmax(self.options["noise0"], nugget) + Y -= mean - # store some matrices - VVT_factor = precision * (Y - mean) - trYYT = np.einsum("ij,ij->", Y - mean, Y - mean) + # Compute: diag(Knn), Kmm and Kmn + Kmm = self._compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget + Kmn = self._compute_K(Z, X, theta, sigma2) - # kernel computations, using BGPLVM notation (psi stats) - Kmm = self.compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget - Lm = linalg.cholesky(Kmm, lower=True) + # Compute (lower) Cholesky decomposition: Kmm = U U^T + U = linalg.cholesky(Kmm, lower=True) + + # Compute (upper) Cholesky decomposition: Qnn = V^T V + Ui = linalg.inv(U) + V = Ui @ Kmn - psi0 = np.full(self.nt, sigma2) - psi1 = self.compute_K(X, Z, theta, sigma2) + # Compute beta, the effective noise precision + beta = 1.0 / np.fmax(self.options["noise0"], nugget) - tmp = psi1 * (np.sqrt(precision)) - LmInv = linalg.inv(Lm) - tmp = LmInv @ tmp.T - A = tmp @ tmp.T + # Compute A = beta * V @ V.T + A = beta * V @ V.T + # Compute (lower) Cholesky decomposition: B = I + A = L L^T B = np.eye(self.nz) + A - LB = linalg.cholesky(B, lower=True) - LBInv = linalg.inv(LB) - - tmp = LmInv @ psi1.T - LBi_Lmi_psi1 = LBInv @ tmp - LBi_Lmi_psi1Vf = LBi_Lmi_psi1 @ VVT_factor - tmp = LBInv.T @ LBi_Lmi_psi1Vf - Cpsi1Vf = LmInv.T @ tmp - - delit = LBi_Lmi_psi1Vf @ LBi_Lmi_psi1Vf.T - data_fit = np.trace(delit) - - beta = precision - lik_1 = ( - -0.5 * num_data * output_dim * (np.log(2.0 * np.pi) - np.log(beta)) - - 0.5 * beta * trYYT - ) - lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) - lik_3 = -output_dim * (np.sum(np.log(np.diag(LB)))) - lik_4 = 0.5 * data_fit - likelihood = lik_1 + lik_2 + lik_3 + lik_4 + L = linalg.cholesky(B, lower=True) + Li = linalg.inv(L) - woodbury_vec = Cpsi1Vf + # Compute b + b = beta * Li @ V @ Y - Bi = LBInv.T @ LBInv + np.eye(LBInv.shape[0]) - woodbury_inv = LmInv.T @ Bi @ LmInv + # Compute log-marginal likelihood + likelihood = -0.5 * ( + # self.nt * np.log(2.0 * np.pi) # constant term ignored in reduced likelihood + -self.nt * np.log(beta) + + 2.0 * np.sum(np.log(np.diag(L))) + + beta * np.einsum("ij,ij->", Y, Y) + - b.T @ b + + self.nt * beta * sigma2 + - np.trace(A) + ) + + # Store Woodbury vectors for prediction step + LiUi = Li @ Ui + Bi = np.eye(self.nz) + Li.T @ Li + woodbury_vec = LiUi.T @ b + woodbury_inv = Ui.T @ Bi @ Ui return likelihood, woodbury_vec, woodbury_inv + # overload kriging based implementation def _predict_values(self, x: np.ndarray, is_acting=None) -> np.ndarray: - Kx, _ = self._compute_KxKxx(x) - mu = Kx.T @ self.woodbury_data["vec"] + """ + Evaluates the model at a set of points using the Woodbury vector. + """ + Kx = self._compute_K( + x, self.Z, self.optimal_par["theta"], self.optimal_par["sigma2"] + ) + mu = Kx @ self.woodbury_data["vec"] return mu + # overload kriging based implementation def _predict_variances(self, x: np.ndarray, is_acting=None) -> np.ndarray: - Kx, Kxx = self._compute_KxKxx(x) + """ + Evaluates the model at a set of points using the inverse Woodbury vector. + """ + Kx = self._compute_K( + self.Z, x, self.optimal_par["theta"], self.optimal_par["sigma2"] + ) + Kxx = np.full(x.shape[0], self.optimal_par["sigma2"]) var = (Kxx - np.sum(np.dot(self.woodbury_data["inv"].T, Kx) * Kx, 0))[:, None] var = np.clip(var, 1e-15, np.inf) var += self.options["noise0"] return var - - def _compute_KxKxx(self, x): - Kx = self.compute_K( - self.Z, x, self.optimal_par["theta"], self.optimal_par["sigma2"] - ) - Kxx = np.full(x.shape[0], self.optimal_par["sigma2"]) - return Kx, Kxx