Skip to content

Commit

Permalink
Anisotropic kernel for the multi-fidelity co-Kriging model (MFCK) (SM…
Browse files Browse the repository at this point in the history
…Torg#692)

* Adding anisotropic Kernel computation and regularization option in the likelihood

* Fixing test

* Updates with the removal of blank lines, checked with ruff, test added

* Save changes to SMT_MFCK_tutorial.ipynb

* Organizing to merge branchs

* Formating files with ruff

* Formating files with ruff.

* Update Cobyla parameters

* Clean up
  • Loading branch information
mcastanoUQ authored Dec 20, 2024
1 parent be8d4a6 commit 27feb83
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 55 deletions.
166 changes: 112 additions & 54 deletions smt/applications/mfck.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from smt.sampling_methods import LHS
from smt.utils.kriging import differences, componentwise_distance
from smt.surrogate_models.krg_based import KrgBased
from smt.utils.misc import standardization


class MFCK(KrgBased):
Expand Down Expand Up @@ -50,10 +51,16 @@ def _initialize(self):
)
declare(
"sigma_bounds",
[1e-1, 100],
[1e-2, 100],
types=(list, np.ndarray),
desc="Bounds for the variance parameter",
)
declare(
"lambda",
0.1,
types=(float),
desc="Regularization parameter",
)

self.options["nugget"] = (
1e-9 # Incresing the nugget for numerical stability reasons
Expand Down Expand Up @@ -83,29 +90,48 @@ def train(self):
self.X = xt
self.y = np.vstack(yt)
self._check_param()
self.nx = 1 # Forcing this in order to consider isotropic kernels (i.e., only one lengthscale)

(
_,
_,
self.X_offset,
self.y_mean,
self.X_scale,
self.y_std,
) = standardization(np.concatenate(xt, axis=0), np.concatenate(yt, axis=0))

self.X_norma_all = [(x - self.X_offset) / self.X_scale for x in xt]
self.y_norma_all = np.vstack([(f - self.y_mean) / self.y_std for f in yt])

if self.lvl == 1:
# For a single level, initialize theta_ini, lower_bounds, and
# upper_bounds with consistent shapes
theta_ini = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
) # Kernel variance + theta0
(self.options["sigma0"], self.options["theta0"])
) # Variance + initial theta values
lower_bounds = np.hstack(
(self.options["sigma_bounds"][0], self.options["theta_bounds"][0])
(
self.options["sigma_bounds"][0],
np.full(self.nx, self.options["theta_bounds"][0]),
)
)
upper_bounds = np.hstack(
(self.options["sigma_bounds"][1], self.options["theta_bounds"][1])
(
self.options["sigma_bounds"][1],
np.full(self.nx, self.options["theta_bounds"][1]),
)
)
theta_ini = np.log10(theta_ini)
lower_bounds = np.log10(lower_bounds)
upper_bounds = np.log10(upper_bounds)
x_opt = theta_ini
# Apply log10 to theta_ini and bounds
nb_params = len(self.options["theta0"])
theta_ini[: nb_params + 1] = np.log10(theta_ini[: nb_params + 1])
lower_bounds[: nb_params + 1] = np.log10(lower_bounds[: nb_params + 1])
upper_bounds[: nb_params + 1] = np.log10(upper_bounds[: nb_params + 1])
else:
for lvl in range(self.lvl):
if lvl == 0:
# Initialize theta_ini for level 0
theta_ini = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
(self.options["sigma0"], self.options["theta0"])
) # Variance + initial theta values
lower_bounds = np.hstack(
(
Expand All @@ -131,9 +157,7 @@ def train(self):

elif lvl > 0:
# For additional levels, append to theta_ini, lower_bounds, and upper_bounds
thetat = np.hstack(
(self.options["sigma0"], self.options["theta0"][0])
)
thetat = np.hstack((self.options["sigma0"], self.options["theta0"]))
lower_boundst = np.hstack(
(
self.options["sigma_bounds"][0],
Expand Down Expand Up @@ -188,9 +212,9 @@ def train(self):
method="COBYLA",
constraints=[{"fun": con, "type": "ineq"} for con in constraints],
options={
"rhobeg": 0.1,
"rhobeg": 0.5,
"tol": 1e-4,
"maxiter": 200,
"maxiter": 100,
},
)
x_opt_iter = optimal_theta_res_loop.x
Expand All @@ -213,18 +237,24 @@ def train(self):
opt.set_lower_bounds(lower_bounds) # Lower bounds for each dimension
opt.set_upper_bounds(upper_bounds) # Upper bounds for each dimension
opt.set_min_objective(self.neg_log_likelihood_nlopt)
opt.set_maxeval(5000)
opt.set_maxeval(1000)
opt.set_xtol_rel(1e-6)
x0 = np.copy(theta_ini)
x_opt = opt.optimize(x0)
else:
raise ValueError(
f"The optimizer {self.options['hyper_opt']} is not available"
)

x_opt[0:2] = 10 ** (x_opt[0:2])
x_opt[2::3] = 10 ** (x_opt[2::3])
x_opt[3::3] = 10 ** (x_opt[3::3])
x_opt[0] = 10 ** (x_opt[0]) # Apply 10** to Sigma 0
x_opt[1 : self.nx + 1] = (
10 ** (x_opt[1 : self.nx + 1])
) # Apply 10** to length scales 0
x_opt[self.nx + 1 :: self.nx + 2] = (
10 ** (x_opt[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, x_opt.shape[0] - 1, self.nx + 2):
x_opt[i : i + self.nx] = 10 ** x_opt[i : i + self.nx]
self.optimal_theta = x_opt

def eta(self, j, jp, rho):
Expand Down Expand Up @@ -253,10 +283,16 @@ def compute_cross_K(self, x, xp, L, Lp, param):
"""
cov_value = 0.0

param0 = param[0:2]
sigmas_gamma = param[2::3]
ls_gamma = param[3::3]
rho_values = param[4::3]
sigma_0 = param[0]
l_0 = param[1 : self.nx + 1]
# param0 = param[0 : self.nx+1]
sigmas_gamma = param[self.nx + 1 :: self.nx + 2]
l_s = [
param[i : i + self.nx].tolist()
for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2)
]
# ls_gamma = param[3::3]
rho_values = param[2 + 2 * self.nx :: self.nx + 2]

# Sum of j=0 until l_^prime
for j in range(Lp + 1):
Expand All @@ -265,12 +301,10 @@ def compute_cross_K(self, x, xp, L, Lp, param):

if j == 0:
# Cov(γ_j(x), γ_j(x')) using the kernel for K_00
cov_gamma_j = self._compute_K(x, xp, param0)
cov_gamma_j = self._compute_K(x, xp, [sigma_0, l_0])
else:
# Cov(γ_j(x), γ_j(x')) using the kernel
cov_gamma_j = self._compute_K(
x, xp, [sigmas_gamma[j - 1], ls_gamma[j - 1]]
)
cov_gamma_j = self._compute_K(x, xp, [sigmas_gamma[j - 1], l_s[j - 1]])
# Add to the value of the covariance
cov_value += eta_j_l * eta_j_lp * cov_gamma_j

Expand All @@ -290,23 +324,30 @@ def predict_all_levels(self, x):
covariances: (list, np.array)
Returns the conditional covariance matrixes per level.
"""
self.K = self.compute_blockwise_K(self.optimal_theta)
means = []
covariances = []
x = (x - self.X_offset) / self.X_scale

if self.lvl == 1:
k_XX = self._compute_K(self.X[0], self.X[0], self.optimal_theta[0:2])
k_xX = self._compute_K(x, self.X[0], self.optimal_theta[0:2])
k_xx = self._compute_K(x, x, self.optimal_theta[0:2])
# To be adapted using the Cholesky decomposition
k_XX_inv = np.linalg.inv(
k_XX + self.options["nugget"] * np.eye(k_XX.shape[0])
sigma = self.optimal_theta[0] # Apply 10** to Sigma 0
l_s = [self.optimal_theta[1 : self.nx + 1]] # Apply 10** to length scales 0

k_XX = self._compute_K(
self.X_norma_all[0], self.X_norma_all[0], [sigma, l_s[0]]
)
means.append(np.dot(k_xX, np.matmul(k_XX_inv, self.y)))
covariances.append(
k_xx - np.matmul(k_xX, np.matmul(k_XX_inv, k_xX.transpose()))
k_xX = self._compute_K(x, self.X_norma_all[0], [sigma, l_s[0]])
k_xx = self._compute_K(x, x, [sigma, l_s[0]])

L = np.linalg.cholesky(
k_XX + self.options["nugget"] * np.eye(k_XX.shape[0])
)

beta1 = solve_triangular(L, k_xX.T, lower=True)
alpha1 = solve_triangular(L, self.y_norma_all, lower=True)
means.append(self.y_std * np.dot(beta1.T, alpha1) + self.y_mean)
covariances.append(k_xx - np.dot(beta1.T, beta1))
else:
self.K = self.compute_blockwise_K(self.optimal_theta)
L = np.linalg.cholesky(
self.K + self.options["nugget"] * np.eye(self.K.shape[0])
)
Expand All @@ -317,19 +358,19 @@ def predict_all_levels(self, x):
if ind >= j:
k_xX.append(
self.compute_cross_K(
self.X[j], x, ind, j, self.optimal_theta
self.X_norma_all[j], x, ind, j, self.optimal_theta
)
)
else:
k_xX.append(
self.compute_cross_K(
self.X[j], x, j, ind, self.optimal_theta
self.X_norma_all[j], x, j, ind, self.optimal_theta
)
)

beta1 = solve_triangular(L, np.vstack(k_xX), lower=True)
alpha1 = solve_triangular(L, self.y, lower=True)
means.append(np.dot(beta1.T, alpha1))
alpha1 = solve_triangular(L, self.y_norma_all, lower=True)
means.append(self.y_std * np.dot(beta1.T, alpha1) + self.y_mean)
covariances.append(k_xx - np.dot(beta1.T, beta1))
k_xX.clear()

Expand All @@ -355,15 +396,18 @@ def _predict(self, x):

def neg_log_likelihood(self, param, grad=None):
if self.lvl == 1:
self.K = self._compute_K(self.X[0], self.X[0], param[0:2])
sigma = param[0]
l_s = [param[1 : self.nx + 1]]
self.K = self._compute_K(self.X[0], self.X[0], [sigma, l_s])
else:
self.K = self.compute_blockwise_K(param)

reg_term = self.options["lambda"] * np.sum(np.power(param, 2))
L = np.linalg.cholesky(
self.K + self.options["nugget"] * np.eye(self.K.shape[0])
)
beta = solve_triangular(L, self.y, lower=True)
NMLL = 1 / 2 * (2 * np.sum(np.log(np.diag(L))) + np.dot(beta.T, beta))
beta = solve_triangular(L, self.y_norma_all, lower=True)
NMLL = np.sum(np.log(np.diag(L))) + np.dot(beta.T, beta) + reg_term
(nmll,) = NMLL[0]
return nmll

Expand All @@ -372,19 +416,33 @@ def neg_log_likelihood_scipy(self, param):
Likelihood for Cobyla-scipy (SMT) optimizer
"""
param = np.array(param, copy=True)
param[0:2] = 10 ** (param[0:2])
param[2::3] = 10 ** (param[2::3])
param[3::3] = 10 ** (param[3::3])
param[0] = 10 ** (param[0]) # Apply 10** to Sigma 0
param[1 : self.nx + 1] = (
10 ** (param[1 : self.nx + 1])
) # Apply 10** to length scales 0
param[self.nx + 1 :: self.nx + 2] = (
10 ** (param[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2):
param[i : i + self.nx] = 10 ** param[i : i + self.nx]
return self.neg_log_likelihood(param)

def neg_log_likelihood_nlopt(self, param, grad=None):
"""
Likelihood for nlopt optimizers
"""
param = np.array(param, copy=True)
param[0:2] = 10 ** (param[0:2])
param[2::3] = 10 ** (param[2::3])
param[3::3] = 10 ** (param[3::3])
param[0] = 10 ** (param[0]) # Apply 10** to Sigma 0
param[1 : self.nx + 1] = (
10 ** (param[1 : self.nx + 1])
) # Apply 10** to length scales 0
param[self.nx + 1 :: self.nx + 2] = (
10 ** (param[self.nx + 1 :: self.nx + 2])
) # Apply 10** to sigmas gamma

for i in np.arange(self.nx + 2, param.shape[0] - 1, self.nx + 2):
param[i : i + self.nx] = 10 ** param[i : i + self.nx]
return self.neg_log_likelihood(param, grad)

def compute_blockwise_K(self, param):
Expand All @@ -394,7 +452,7 @@ def compute_blockwise_K(self, param):
for j in range(self.lvl):
if jp >= j:
K_block[str(jp) + str(j)] = self.compute_cross_K(
self.X[j], self.X[jp], jp, j, param
self.X_norma_all[j], self.X_norma_all[jp], jp, j, param
)

K = np.zeros((n, n))
Expand Down Expand Up @@ -427,7 +485,7 @@ def _compute_K(self, A: np.ndarray, B: np.ndarray, param):
self.X[0].shape[1],
power=self.options["pow_exp_power"],
)
self.corr.theta = np.full(self.X[0].shape[1], param[1])
self.corr.theta = np.asarray(param[1])
r = self.corr(d)
R = r.reshape(A.shape[0], B.shape[0])
K = param[0] * R
Expand Down
2 changes: 1 addition & 1 deletion smt/applications/tests/test_mfck.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_mfck(self):

t_error = num / den

self.assert_error(t_error, 0.0, 1e-6, 1e-6)
self.assert_error(t_error, 0.0, 1e-5, 1e-5)

@staticmethod
def run_mfck_example():
Expand Down

0 comments on commit 27feb83

Please sign in to comment.