diff --git a/.github/workflows/build_release.yml b/.github/workflows/build_release.yml
new file mode 100644
index 000000000..4bcb25546
--- /dev/null
+++ b/.github/workflows/build_release.yml
@@ -0,0 +1,59 @@
+name: Build and upload to PyPI
+
+on:
+  workflow_dispatch:
+  push:
+    tags: ['v*']
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-2022, macos-11]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.16.2
+        env:
+          CIBW_BUILD: cp38-* cp39-* cp310-* cp311-*
+          CIBW_SKIP: cp*_i686
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build sdist
+        run: pipx run build --sdist
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: dist/*.tar.gz
+
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          # unpacks default artifact into dist/
+          # if `name: artifact` is omitted, the action will create extra parent dir
+          name: artifact
+          path: dist
+
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 9a3f83606..91e8fa507 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -23,7 +23,7 @@ jobs:
             python-version: "3.8"
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up python
       uses: actions/setup-python@v4
       with:
diff --git a/.github/workflows/tests_coverage.yml b/.github/workflows/tests_coverage.yml
index e88908b56..0a254dffc 100644
--- a/.github/workflows/tests_coverage.yml
+++ b/.github/workflows/tests_coverage.yml
@@ -15,7 +15,7 @@ jobs:
         python-version: ['3.8']
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
diff --git a/.gitignore b/.gitignore
index 9b68d1eae..9bb3e784f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 *.pyd
 *.pyc
 *~
-*build*
+**/*build
 *.egg-info
 *.so
 *.so.*
diff --git a/README.md b/README.md
index 745f06580..0bedf348f 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ To cite SMT legacy: M. A. Bouhlel and J. T. Hwang and N. Bartoli and R. Lafage a
 ```
 
 # Required packages
-SMT depends on the following modules: numpy, scipy, scikit-learn, pyDOE2 and Cython. 
+SMT depends on the following modules: numpy, scipy, scikit-learn, pyDOE3 and Cython. 
 
 # Installation
 If you want to install the latest release
diff --git a/doc/_src_docs/surrogate_models.rst b/doc/_src_docs/surrogate_models.rst
index 324c4c5ea..11f77ca80 100644
--- a/doc/_src_docs/surrogate_models.rst
+++ b/doc/_src_docs/surrogate_models.rst
@@ -18,6 +18,7 @@ SMT contains the surrogate modeling methods listed below.
    surrogate_models/gekpls
    surrogate_models/genn
    surrogate_models/mgp
+   surrogate_models/sgp
 
 
 Usage
diff --git a/doc/_src_docs/surrogate_models.rstx b/doc/_src_docs/surrogate_models.rstx
index 43f8f8c82..6e0b640f4 100644
--- a/doc/_src_docs/surrogate_models.rstx
+++ b/doc/_src_docs/surrogate_models.rstx
@@ -18,6 +18,7 @@ SMT contains the surrogate modeling methods listed below.
    surrogate_models/gekpls
    surrogate_models/genn
    surrogate_models/mgp
+   surrogate_models/sgp
 
 
 Usage
diff --git a/doc/_src_docs/surrogate_models/sgp.rst b/doc/_src_docs/surrogate_models/sgp.rst
new file mode 100644
index 000000000..07ff82c14
--- /dev/null
+++ b/doc/_src_docs/surrogate_models/sgp.rst
@@ -0,0 +1,371 @@
+Sparse Gaussian Process (SGP)
+=============================
+
+Although the versatility of Gaussian Process regression models for learning complex data, their computational complexity, 
+which is :math:`\mathcal{O}(N^3)` with :math:`N` the number of training points, prevent their use to large datasets. 
+This complexity results from the inversion of the covariance matrix :math:`\mathbf{K}`. We must also highlight that the memory 
+cost of GPR models is :math:`\mathcal{O}(N^2)`, mainly due to the storage of the covariance matrix itself.
+
+To address these limitations, sparse GPs approximation methods have emerged as efficient alternatives. 
+Sparse GPs consider a set of inducing points to approximate the posterior Gaussian distribution with a low-rank representation,
+while the variational inference provides a framework for approximating the posterior distribution directly. 
+Thus, these methods enable accurate modeling of large datasets while preserving computational efficiency 
+(typically :math:`\mathcal{O}(NM^2)` time and :math:`\mathcal{O}(NM)` memory for some chosen :math:`M<N`). 
+
+See [1]_ for a detailed information and discussion on several approximation methods benefits and drawbacks.
+
+Implementation
+--------------
+
+In SMT the methods: Fully Independent Training Conditional (FITC) method and the Variational Free Energy (VFE) approximation
+are implemented inspired from inference methods developed in the GPy project [2]_
+
+In practice, the implementation rely on the expression of their respective negative marginal log
+likelihood (NMLL), which is minimised to train the methods. We have the following expressions:
+
+For FITC
+
+.. math :: 
+    \text{NMLL}_{\text{FITC}} = \frac{1}{2}\log\left(\text{det}\left(\tilde{\mathbf{Q}}_{NN} + \eta^2\mathbf{I}_N\right)\right) + \frac{1}{2}\mathbf{y}^\top\left(\tilde{\mathbf{Q}}_{NN} + \eta^2\mathbf{I}_N\right)^{-1}\mathbf{y} + \frac{N}{2}\log(2\pi)
+
+
+For VFE
+
+.. math :: 
+    \text{NMLL}_{\text{VFE}} = \frac{1}{2}\log\left(\text{det}\left(\mathbf{Q}_{NN} + \eta^2\mathbf{I}_N\right)\right) + \frac{1}{2}\mathbf{y}^\top\left(\mathbf{Q}_{NN} + \eta^2\mathbf{I}_N\right)^{-1}\mathbf{y} + \frac{1}{2\eta^2}\text{Tr}\left[\mathbf{K}_{NN} + \mathbf{Q}_{NN} \right] + \frac{N}{2}\log(2\pi)
+
+where
+
+.. math ::
+
+    \mathbf{K}_{NN} \approx \mathbf{Q}_{NN} = \mathbf{K}_{NM}\mathbf{K}_{MM}^{-1} \mathbf{K}f_{NM}^\top
+
+    \tilde{\mathbf{Q}}_{NN} = \mathbf{Q}_{NN} + \text{diag}\left[\mathbf{K}_{NN} - \mathbf{Q}_{NN}\right]
+
+and :math:`\eta^2` is the variance of the gaussian noise assumed on training data.
+
+Limitations
+-----------
+
+* Inducing points location can not be optimized.
+* Noise variance on observed training data can not be estimated.
+* Trend function is assumed to be zero.
+
+
+.. [1] Matthias Bauer, Mark van der Wilk, and Carl Edward Rasmussen. "Understanding Probabilistic Sparse Gaussian Process Approximations". In: Advances in Neural Information Processing Systems. Ed. by D. Lee et al. Vol. 29. Curran Associates, Inc., 2016
+
+.. [2] https://github.com/SheffieldML/GPy
+
+
+Usage
+-----
+
+Using FITC method
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+  import numpy as np
+  import matplotlib.pyplot as plt
+  
+  from smt.surrogate_models import SGP
+  
+  def f_obj(x):
+      import numpy as np
+  
+      return (
+          np.sin(3 * np.pi * x)
+          + 0.3 * np.cos(9 * np.pi * x)
+          + 0.5 * np.sin(7 * np.pi * x)
+      )
+  
+  # random generator for reproducibility
+  rng = np.random.RandomState(0)
+  
+  # Generate training data
+  nt = 200
+  # Variance of the gaussian noise on our trainingg data
+  eta2 = [0.01]
+  gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1))
+  xt = 2 * rng.rand(nt, 1) - 1
+  yt = f_obj(xt) + gaussian_noise
+  
+  # Pick inducing points randomly in training data
+  n_inducing = 30
+  random_idx = rng.permutation(nt)[:n_inducing]
+  Z = xt[random_idx].copy()
+  
+  sgp = SGP(noise0=eta2)  # Assume here we have an idea of the variance eta2
+  sgp.set_training_values(xt, yt)
+  sgp.set_inducing_inputs(Z=Z)
+  # sgp.set_inducing_inputs()  # When Z not specified inducing points are picked randomly in traing data
+  sgp.train()
+  
+  x = np.linspace(-1, 1, nt + 1).reshape(-1, 1)
+  y = f_obj(x)
+  hat_y = sgp.predict_values(x)
+  var = sgp.predict_variances(x)
+  
+  # plot prediction
+  plt.figure(figsize=(14, 6))
+  plt.plot(x, y, "C1-", label="target function")
+  plt.scatter(xt, yt, marker="o", s=10, label="observed data")
+  plt.plot(x, hat_y, "k-", label="Sparse GP")
+  plt.plot(x, hat_y - 3 * np.sqrt(var), "k--")
+  plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI")
+  plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points")
+  plt.ylim([-3, 3])
+  plt.legend(loc=0)
+  plt.show()
+  
+::
+
+  ___________________________________________________________________________
+     
+                                      SGP
+  ___________________________________________________________________________
+     
+   Problem size
+     
+        # training points.        : 200
+     
+  ___________________________________________________________________________
+     
+   Training
+     
+     Training ...
+     Training - done. Time (sec):  0.2497931
+  ___________________________________________________________________________
+     
+   Evaluation
+     
+        # eval points. : 201
+     
+     Predicting ...
+     Predicting - done. Time (sec):  0.0000000
+     
+     Prediction time/pt. (sec) :  0.0000000
+     
+  
+.. figure:: sgp_Test_test_sgp_fitc.png
+  :scale: 80 %
+  :align: center
+
+Using VFE method
+^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+  import numpy as np
+  import matplotlib.pyplot as plt
+  
+  from smt.surrogate_models import SGP
+  
+  def f_obj(x):
+      import numpy as np
+  
+      return (
+          np.sin(3 * np.pi * x)
+          + 0.3 * np.cos(9 * np.pi * x)
+          + 0.5 * np.sin(7 * np.pi * x)
+      )
+  
+  # random generator for reproducibility
+  rng = np.random.RandomState(42)
+  
+  # Generate training data
+  nt = 200
+  # Variance of the gaussian noise on our trainingg data
+  eta2 = [0.01]
+  gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1))
+  xt = 2 * rng.rand(nt, 1) - 1
+  yt = f_obj(xt) + gaussian_noise
+  
+  # Pick inducing points randomly in training data
+  n_inducing = 30
+  random_idx = rng.permutation(nt)[:n_inducing]
+  Z = xt[random_idx].copy()
+  
+  sgp = SGP(noise0=eta2, method="VFE")
+  sgp.set_training_values(xt, yt)
+  sgp.set_inducing_inputs(Z=Z)
+  sgp.train()
+  
+  x = np.linspace(-1, 1, nt + 1).reshape(-1, 1)
+  y = f_obj(x)
+  hat_y = sgp.predict_values(x)
+  var = sgp.predict_variances(x)
+  
+  # plot prediction
+  plt.figure(figsize=(14, 6))
+  plt.plot(x, y, "C1-", label="target function")
+  plt.scatter(xt, yt, marker="o", s=10, label="observed data")
+  plt.plot(x, hat_y, "k-", label="Sparse GP")
+  plt.plot(x, hat_y - 3 * np.sqrt(var), "k--")
+  plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI")
+  plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points")
+  plt.ylim([-3, 3])
+  plt.legend(loc=0)
+  plt.show()
+  
+::
+
+  ___________________________________________________________________________
+     
+                                      SGP
+  ___________________________________________________________________________
+     
+   Problem size
+     
+        # training points.        : 200
+     
+  ___________________________________________________________________________
+     
+   Training
+     
+     Training ...
+     Training - done. Time (sec):  0.1902514
+  ___________________________________________________________________________
+     
+   Evaluation
+     
+        # eval points. : 201
+     
+     Predicting ...
+     Predicting - done. Time (sec):  0.0000000
+     
+     Prediction time/pt. (sec) :  0.0000000
+     
+  
+.. figure:: sgp_Test_test_sgp_vfe.png
+  :scale: 80 %
+  :align: center
+
+Options
+-------
+
+.. list-table:: List of options
+  :header-rows: 1
+  :widths: 15, 10, 20, 20, 30
+  :stub-columns: 0
+
+  *  -  Option
+     -  Default
+     -  Acceptable values
+     -  Acceptable types
+     -  Description
+  *  -  print_global
+     -  True
+     -  None
+     -  ['bool']
+     -  Global print toggle. If False, all printing is suppressed
+  *  -  print_training
+     -  True
+     -  None
+     -  ['bool']
+     -  Whether to print training information
+  *  -  print_prediction
+     -  True
+     -  None
+     -  ['bool']
+     -  Whether to print prediction information
+  *  -  print_problem
+     -  True
+     -  None
+     -  ['bool']
+     -  Whether to print problem information
+  *  -  print_solver
+     -  True
+     -  None
+     -  ['bool']
+     -  Whether to print solver information
+  *  -  poly
+     -  constant
+     -  ['constant']
+     -  ['str']
+     -  Regression function type
+  *  -  corr
+     -  squar_exp
+     -  ['squar_exp']
+     -  ['str']
+     -  Correlation function type
+  *  -  pow_exp_power
+     -  1.9
+     -  None
+     -  ['float']
+     -  Power for the pow_exp kernel function (valid values in (0.0, 2.0]), This option is set automatically when corr option is squar, abs, or matern.
+  *  -  categorical_kernel
+     -  MixIntKernelType.CONT_RELAX
+     -  [<MixIntKernelType.CONT_RELAX: 'CONT_RELAX'>, <MixIntKernelType.GOWER: 'GOWER'>, <MixIntKernelType.EXP_HOMO_HSPHERE: 'EXP_HOMO_HSPHERE'>, <MixIntKernelType.HOMO_HSPHERE: 'HOMO_HSPHERE'>]
+     -  None
+     -  The kernel to use for categorical inputs. Only for non continuous Kriging
+  *  -  hierarchical_kernel
+     -  MixHrcKernelType.ALG_KERNEL
+     -  [<MixHrcKernelType.ALG_KERNEL: 'ALG_KERNEL'>, <MixHrcKernelType.ARC_KERNEL: 'ARC_KERNEL'>]
+     -  None
+     -  The kernel to use for mixed hierarchical inputs. Only for non continuous Kriging
+  *  -  nugget
+     -  1e-08
+     -  None
+     -  ['float']
+     -  a jitter for numerical stability
+  *  -  theta0
+     -  [0.01]
+     -  None
+     -  ['list', 'ndarray']
+     -  Initial hyperparameters
+  *  -  theta_bounds
+     -  [1e-06, 100.0]
+     -  None
+     -  ['list', 'ndarray']
+     -  bounds for hyperparameters
+  *  -  hyper_opt
+     -  Cobyla
+     -  ['Cobyla', 'TNC']
+     -  ['str']
+     -  Optimiser for hyperparameters optimisation
+  *  -  eval_noise
+     -  True
+     -  [True, False]
+     -  ['bool']
+     -  Noise is always evaluated
+  *  -  noise0
+     -  [0.01]
+     -  None
+     -  ['list', 'ndarray']
+     -  Gaussian noise on observed training data
+  *  -  noise_bounds
+     -  [2.220446049250313e-14, 10000000000.0]
+     -  None
+     -  ['list', 'ndarray']
+     -  bounds for noise hyperparameters
+  *  -  use_het_noise
+     -  False
+     -  [True, False]
+     -  ['bool']
+     -  heteroscedastic noise evaluation flag
+  *  -  n_start
+     -  10
+     -  None
+     -  ['int']
+     -  number of optimizer runs (multistart method)
+  *  -  xlimits
+     -  None
+     -  None
+     -  ['list', 'ndarray']
+     -  definition of a design space of float (continuous) variables: array-like of size nx x 2 (lower, upper bounds)
+  *  -  design_space
+     -  None
+     -  None
+     -  ['BaseDesignSpace', 'list', 'ndarray']
+     -  definition of the (hierarchical) design space: use `smt.utils.design_space.DesignSpace` as the main API. Also accepts list of float variable bounds
+  *  -  method
+     -  FITC
+     -  ['FITC', 'VFE']
+     -  ['str']
+     -  Method used by sparse GP model
+  *  -  n_inducing
+     -  10
+     -  None
+     -  ['int']
+     -  Number of inducing inputs
diff --git a/doc/_src_docs/surrogate_models/sgp.rstx b/doc/_src_docs/surrogate_models/sgp.rstx
new file mode 100644
index 000000000..03f9c5f28
--- /dev/null
+++ b/doc/_src_docs/surrogate_models/sgp.rstx
@@ -0,0 +1,76 @@
+Sparse Gaussian Process (SGP)
+=============================
+
+Although the versatility of Gaussian Process regression models for learning complex data, their computational complexity, 
+which is :math:`\mathcal{O}(N^3)` with :math:`N` the number of training points, prevent their use to large datasets. 
+This complexity results from the inversion of the covariance matrix :math:`\mathbf{K}`. We must also highlight that the memory 
+cost of GPR models is :math:`\mathcal{O}(N^2)`, mainly due to the storage of the covariance matrix itself.
+
+To address these limitations, sparse GPs approximation methods have emerged as efficient alternatives. 
+Sparse GPs consider a set of inducing points to approximate the posterior Gaussian distribution with a low-rank representation,
+while the variational inference provides a framework for approximating the posterior distribution directly. 
+Thus, these methods enable accurate modeling of large datasets while preserving computational efficiency 
+(typically :math:`\mathcal{O}(NM^2)` time and :math:`\mathcal{O}(NM)` memory for some chosen :math:`M<N`). 
+
+See [1]_ for a detailed information and discussion on several approximation methods benefits and drawbacks.
+
+Implementation
+--------------
+
+In SMT the methods: Fully Independent Training Conditional (FITC) method and the Variational Free Energy (VFE) approximation
+are implemented inspired from inference methods developed in the GPy project [2]_
+
+In practice, the implementation rely on the expression of their respective negative marginal log
+likelihood (NMLL), which is minimised to train the methods. We have the following expressions:
+
+For FITC
+
+.. math :: 
+    \text{NMLL}_{\text{FITC}} = \frac{1}{2}\log\left(\text{det}\left(\tilde{\mathbf{Q}}_{NN} + \eta^2\mathbf{I}_N\right)\right) + \frac{1}{2}\mathbf{y}^\top\left(\tilde{\mathbf{Q}}_{NN} + \eta^2\mathbf{I}_N\right)^{-1}\mathbf{y} + \frac{N}{2}\log(2\pi)
+
+
+For VFE
+
+.. math :: 
+    \text{NMLL}_{\text{VFE}} = \frac{1}{2}\log\left(\text{det}\left(\mathbf{Q}_{NN} + \eta^2\mathbf{I}_N\right)\right) + \frac{1}{2}\mathbf{y}^\top\left(\mathbf{Q}_{NN} + \eta^2\mathbf{I}_N\right)^{-1}\mathbf{y} + \frac{1}{2\eta^2}\text{Tr}\left[\mathbf{K}_{NN} + \mathbf{Q}_{NN} \right] + \frac{N}{2}\log(2\pi)
+
+where
+
+.. math ::
+
+    \mathbf{K}_{NN} \approx \mathbf{Q}_{NN} = \mathbf{K}_{NM}\mathbf{K}_{MM}^{-1} \mathbf{K}f_{NM}^\top
+
+    \tilde{\mathbf{Q}}_{NN} = \mathbf{Q}_{NN} + \text{diag}\left[\mathbf{K}_{NN} - \mathbf{Q}_{NN}\right]
+
+and :math:`\eta^2` is the variance of the gaussian noise assumed on training data.
+
+Limitations
+-----------
+
+* Inducing points location can not be optimized.
+* Noise variance on observed training data can not be estimated.
+* Trend function is assumed to be zero.
+
+
+.. [1] Matthias Bauer, Mark van der Wilk, and Carl Edward Rasmussen. "Understanding Probabilistic Sparse Gaussian Process Approximations". In: Advances in Neural Information Processing Systems. Ed. by D. Lee et al. Vol. 29. Curran Associates, Inc., 2016
+
+.. [2] https://github.com/SheffieldML/GPy
+
+
+Usage
+-----
+
+Using FITC method
+^^^^^^^^^^^^^^^^^
+
+.. embed-test-print-plot :: smt.surrogate_models.tests.test_surrogate_model_examples , Test , test_sgp_fitc , 80
+
+Using VFE method
+^^^^^^^^^^^^^^^^
+
+.. embed-test-print-plot :: smt.surrogate_models.tests.test_surrogate_model_examples , Test , test_sgp_vfe , 80
+
+Options
+-------
+
+.. embed-options-table :: smt.surrogate_models , SGP , options
diff --git a/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_fitc.png b/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_fitc.png
new file mode 100644
index 000000000..7e9786783
Binary files /dev/null and b/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_fitc.png differ
diff --git a/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_vfe.png b/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_vfe.png
new file mode 100644
index 000000000..aea1ab85e
Binary files /dev/null and b/doc/_src_docs/surrogate_models/sgp_Test_test_sgp_vfe.png differ
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 91de9d982..0bf29edaa 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -2,7 +2,7 @@ Cython
 numpy
 scipy
 scikit-learn
-pyDOE2
+pyDOE3
 numpydoc
 matplotlib
 git+https://github.com/hwangjt/sphinx_auto_embed.git  # for doc generation 
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index a91d448e5..b0b9ac9a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ Cython
 numpy
 scipy
 scikit-learn
-pyDOE2
+pyDOE3
 numba        # JIT compiler
 matplotlib   # used in examples and tests
 pytest       # tests runner
diff --git a/setup.py b/setup.py
index e10049582..cf2c03245 100644
--- a/setup.py
+++ b/setup.py
@@ -105,7 +105,7 @@
     ],
     install_requires=[
         "scikit-learn",
-        "pyDOE2",
+        "pyDOE3",
         "scipy",
     ],
     extras_require={
diff --git a/smt/__init__.py b/smt/__init__.py
index ab028660e..dac777894 100644
--- a/smt/__init__.py
+++ b/smt/__init__.py
@@ -1 +1 @@
-__version__ = "2.1"
+__version__ = "2.1.0"
\ No newline at end of file
diff --git a/smt/applications/mfk.py b/smt/applications/mfk.py
index f54fb9b70..b6bf8538e 100644
--- a/smt/applications/mfk.py
+++ b/smt/applications/mfk.py
@@ -17,15 +17,24 @@
 
 from sklearn.cross_decomposition import PLSRegression as pls
 
-from smt.surrogate_models.krg_based import KrgBased
+from smt.surrogate_models.krg_based import (
+    KrgBased,
+    MixIntKernelType,
+)
+
 from smt.sampling_methods import LHS
 from smt.utils.kriging import (
     cross_distances,
     componentwise_distance,
     differences,
+    gower_componentwise_distances,
+    cross_levels,
+    compute_X_cont,
 )
 from smt.utils.misc import standardization
 
+from smt.surrogate_models.krg_based import compute_n_param
+
 
 class NestedLHS(object):
     def __init__(self, nlevel, xlimits, random_state=None):
@@ -130,6 +139,10 @@ def _initialize(self):
         )
         self.name = "MFK"
 
+        self.X2_norma = {}
+        self.X2_offset = {}
+        self.X2_scale = {}
+
     def _differences(self, X, Y):
         """
         Compute the distances
@@ -182,6 +195,8 @@ def _new_train(self):
         Overrides KrgBased implementation
         Trains the Multi-Fidelity model
         """
+        self._corr_params = None
+
         self._new_train_init()
         theta0 = self.options["theta0"].copy()
         noise0 = self.options["noise0"].copy()
@@ -226,9 +241,28 @@ def _new_train_init(self):
         X = self.X
         y = self.y
 
-        _, _, self.X_offset, self.y_mean, self.X_scale, self.y_std = standardization(
-            np.concatenate(xt, axis=0), np.concatenate(yt, axis=0)
-        )
+        if self.is_continuous:
+            (
+                _,
+                _,
+                self.X_offset,
+                self.y_mean,
+                self.X_scale,
+                self.y_std,
+            ) = standardization(np.concatenate(xt, axis=0), np.concatenate(yt, axis=0))
+            self.cat_features = [False] * np.shape(np.concatenate(xt, axis=0))[1]
+        else:
+            (
+                _,
+                _,
+                self.X_offset,
+                self.y_mean,
+                self.X_scale,
+                self.y_std,
+            ) = standardization(np.concatenate(xt, axis=0), np.concatenate(yt, axis=0))
+            _, self.cat_features = compute_X_cont(
+                np.concatenate(xt, axis=0), self.design_space
+            )
 
         nlevel = self.nlvl
 
@@ -244,6 +278,16 @@ def _new_train_init(self):
         self.X_norma_all = [(x - self.X_offset) / self.X_scale for x in X]
         self.y_norma_all = [(f - self.y_mean) / self.y_std for f in y]
 
+        self.X_norma_all = [(x - self.X_offset) / self.X_scale for x in X]
+
+        self.y_norma_all = [(f - self.y_mean) / self.y_std for f in y]
+
+        if not (self.is_continuous):
+            x2t, _ = self.design_space.unfold_x(np.concatenate(xt, axis=0))
+            _, _, self.X2_offset, _, self.X2_scale, _ = standardization(
+                x2t, np.concatenate(yt, axis=0)
+            )
+
     def _new_train_iteration(self, lvl):
         n_samples = self.nt_all
         self.options["noise0"] = np.array([self.options["noise0"][lvl]]).flatten()
@@ -287,7 +331,20 @@ def _new_train_iteration(self, lvl):
             self.optimal_noise_all[lvl] = self.optimal_noise
 
         # Calculate matrix of distances D between samples
-        self.D_all[lvl] = cross_distances(self.X_norma)
+        if self.is_continuous:
+            self.D_all[lvl] = cross_distances(self.X_norma)
+        else:
+            _, is_acting = self.design_space.correct_get_acting(self.X[lvl])
+            D_lvl, ij_lvl, X = gower_componentwise_distances(
+                X=self.X[lvl],
+                x_is_acting=is_acting,
+                design_space=self.design_space,
+                hierarchical_kernel=self.options["hierarchical_kernel"],
+            )
+            self.Lij, self.n_levels = cross_levels(
+                X=self.X[lvl], ij=ij_lvl, design_space=self.design_space
+            )
+            self.D_all[lvl] = D_lvl, ij_lvl
 
         # Regression matrix and parameters
         self.F_all[lvl] = self._regression_types[self.options["poly"]](self.X_norma)
@@ -297,18 +354,34 @@ def _new_train_iteration(self, lvl):
         if lvl > 0:
             F_rho = self._regression_types[self.options["rho_regr"]](self.X_norma)
             self.q_all[lvl] = F_rho.shape[1]
-            self.F_all[lvl] = np.hstack(
-                (
-                    F_rho
-                    * np.dot(
-                        self._predict_intermediate_values(
-                            self.X_norma, lvl, descale=False
+
+            if self.is_continuous:
+                self.F_all[lvl] = np.hstack(
+                    (
+                        F_rho
+                        * np.dot(
+                            self._predict_intermediate_values(
+                                self.X_norma, lvl, descale=False
+                            ),
+                            np.ones((1, self.q_all[lvl])),
                         ),
-                        np.ones((1, self.q_all[lvl])),
-                    ),
-                    self.F_all[lvl],
+                        self.F_all[lvl],
+                    )
                 )
-            )
+            else:
+                self.F_all[lvl] = np.hstack(
+                    (
+                        F_rho
+                        * np.dot(
+                            self._predict_intermediate_values(
+                                self.X[lvl], lvl, descale=False
+                            ),
+                            np.ones((1, self.q_all[lvl])),
+                        ),
+                        self.F_all[lvl],
+                    )
+                )
+
         else:
             self.q_all[lvl] = 0
 
@@ -393,18 +466,65 @@ def _predict_intermediate_values(self, X, lvl, descale=True):
 
         # Calculate kriging mean and variance at level 0
         mu = np.zeros((n_eval, lvl))
-        if descale:
+
+        if not (self.is_continuous):
+            X_usc = X
+        if descale and self.is_continuous:
             X = (X - self.X_offset) / self.X_scale
+
         f = self._regression_types[self.options["poly"]](X)
         f0 = self._regression_types[self.options["poly"]](X)
 
-        dx = self._differences(X, Y=self.X_norma_all[0])
-        d = self._componentwise_distance(dx)
-
         beta = self.optimal_par[0]["beta"]
-        r_ = self._correlation_types[self.options["corr"]](
-            self.optimal_theta[0], d
-        ).reshape(n_eval, self.nt_all[0])
+        if self.is_continuous:
+            dx = self._differences(X, Y=self.X_norma_all[0])
+            d = self._componentwise_distance(dx)
+            r_ = self._correlation_types[self.options["corr"]](
+                self.optimal_theta[0], d
+            ).reshape(n_eval, self.nt_all[0])
+        else:
+            _, x_is_acting = self.design_space.correct_get_acting(X_usc)
+            _, y_is_acting = self.design_space.correct_get_acting(self.X[0])
+            dx = gower_componentwise_distances(
+                X=X_usc,
+                x_is_acting=x_is_acting,
+                design_space=self.design_space,
+                hierarchical_kernel=self.options["hierarchical_kernel"],
+                y=np.copy(self.X[0]),
+                y_is_acting=y_is_acting,
+            )
+            d = componentwise_distance(
+                dx,
+                self.options["corr"],
+                self.nx,
+                power=self.options["pow_exp_power"],
+                theta=None,
+                return_derivative=False,
+            )
+            _, ij = cross_distances(X_usc, self.X[0])
+            Lij, _ = cross_levels(
+                X=X_usc, ij=ij, design_space=self.design_space, y=self.X[0]
+            )
+            self.ij = ij
+            if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                Xpred, _ = self.design_space.unfold_x(X_usc)
+                Xpred_norma = (Xpred - self.X2_offset) / self.X2_scale
+                # Get pairwise componentwise L1-distances to the input training set
+                dx = differences(Xpred_norma, Y=self.X2_norma[str(0)].copy())
+            r_ = self._matrix_data_corr(
+                corr=self.options["corr"],
+                design_space=self.design_space,
+                power=self.options["pow_exp_power"],
+                theta=self.optimal_theta[0],
+                theta_bounds=self.options["theta_bounds"],
+                dx=dx,
+                Lij=Lij,
+                n_levels=self.n_levels,
+                cat_features=self.cat_features,
+                cat_kernel=self.options["categorical_kernel"],
+                x=X_usc,
+            ).reshape(n_eval, self.nt_all[0])
+
         gamma = self.optimal_par[0]["gamma"]
 
         # Scaled predictor
@@ -413,11 +533,56 @@ def _predict_intermediate_values(self, X, lvl, descale=True):
         # Calculate recursively kriging mean and variance at level i
         for i in range(1, lvl):
             g = self._regression_types[self.options["rho_regr"]](X)
-            dx = self._differences(X, Y=self.X_norma_all[i])
-            d = self._componentwise_distance(dx)
-            r_ = self._correlation_types[self.options["corr"]](
-                self.optimal_theta[i], d
-            ).reshape(n_eval, self.nt_all[i])
+
+            if self.is_continuous:
+                dx = self._differences(X, Y=self.X_norma_all[i])
+                d = self._componentwise_distance(dx)
+                r_ = self._correlation_types[self.options["corr"]](
+                    self.optimal_theta[i], d
+                ).reshape(n_eval, self.nt_all[i])
+            else:
+                _, x_is_acting = self.design_space.correct_get_acting(X_usc)
+                _, y_is_acting = self.design_space.correct_get_acting(self.X[i])
+                dx = gower_componentwise_distances(
+                    X_usc,
+                    x_is_acting=x_is_acting,
+                    design_space=self.design_space,
+                    hierarchical_kernel=self.options["hierarchical_kernel"],
+                    y=np.copy(self.X[i]),
+                    y_is_acting=y_is_acting,
+                )
+                d = componentwise_distance(
+                    dx,
+                    self.options["corr"],
+                    self.nx,
+                    power=self.options["pow_exp_power"],
+                    theta=None,
+                    return_derivative=False,
+                )
+                _, ij = cross_distances(X_usc, self.X[i])
+                Lij, _ = cross_levels(
+                    X=X_usc, ij=ij, design_space=self.design_space, y=self.X[i]
+                )
+                self.ij = ij
+                if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                    Xpred, _ = self.design_space.unfold_x(X_usc)
+                    Xpred_norma = (Xpred - self.X2_offset) / self.X2_scale
+                    # Get pairwise componentwise L1-distances to the input training set
+                    dx = differences(Xpred_norma, Y=self.X2_norma[str(i)].copy())
+                r_ = self._matrix_data_corr(
+                    corr=self.options["corr"],
+                    design_space=self.design_space,
+                    power=self.options["pow_exp_power"],
+                    theta=self.optimal_theta[i],
+                    theta_bounds=self.options["theta_bounds"],
+                    dx=dx,
+                    Lij=Lij,
+                    n_levels=self.n_levels,
+                    cat_features=self.cat_features,
+                    cat_kernel=self.options["categorical_kernel"],
+                    x=X_usc,
+                ).reshape(n_eval, self.nt_all[i])
+
             f = np.vstack((g.T * mu[:, i - 1], f0.T))
             beta = self.optimal_par[i]["beta"]
             gamma = self.optimal_par[i]["gamma"]
@@ -463,7 +628,8 @@ def _predict_variances(self, X: np.ndarray, is_acting=None) -> np.ndarray:
         """
         return self.predict_variances_all_levels(X)[0][:, -1]
 
-    def predict_variances_all_levels(self, X):
+    def predict_variances_all_levels(self, X, is_acting=None):
+        # def predict_variances_all_levels(self, X):
         """
         Evaluates the model at a set of points.
 
@@ -483,14 +649,14 @@ def predict_variances_all_levels(self, X):
         n_eval, n_features_X = X.shape
         #        if n_features_X != self.n_features:
         #            raise ValueError("Design must be an array of n_features columns.")
+
         X = (X - self.X_offset) / self.X_scale
 
         # Calculate kriging mean and variance at level 0
         mu = np.zeros((n_eval, nlevel))
+
         f = self._regression_types[self.options["poly"]](X)
         f0 = self._regression_types[self.options["poly"]](X)
-        dx = self._differences(X, Y=self.X_norma_all[0])
-        d = self._componentwise_distance(dx)
 
         # Get regression function and correlation
         F = self.F_all[0]
@@ -498,10 +664,56 @@ def predict_variances_all_levels(self, X):
 
         beta = self.optimal_par[0]["beta"]
         Ft = solve_triangular(C, F, lower=True)
-        # yt = solve_triangular(C, self.y_norma_all[0], lower=True)
-        r_ = self._correlation_types[self.options["corr"]](
-            self.optimal_theta[0], d
-        ).reshape(n_eval, self.nt_all[0])
+
+        if self.is_continuous:
+            dx = self._differences(X, Y=self.X_norma_all[0])
+            d = self._componentwise_distance(dx)
+            r_ = self._correlation_types[self.options["corr"]](
+                self.optimal_theta[0], d
+            ).reshape(n_eval, self.nt_all[0])
+        else:
+            _, y_is_acting = self.design_space.correct_get_acting(self.X[0])
+            _, x_is_acting = self.design_space.correct_get_acting(X)
+            dx = gower_componentwise_distances(
+                X,
+                x_is_acting=x_is_acting,
+                design_space=self.design_space,
+                hierarchical_kernel=self.options["hierarchical_kernel"],
+                y=np.copy(self.X[0]),
+                y_is_acting=y_is_acting,
+            )
+            d = componentwise_distance(
+                dx,
+                self.options["corr"],
+                self.nx,
+                power=self.options["pow_exp_power"],
+                theta=None,
+                return_derivative=False,
+            )
+            _, ij = cross_distances(X, self.X[0])
+            Lij, _ = cross_levels(
+                X=X, ij=ij, design_space=self.design_space, y=self.X[0]
+            )
+            self.ij = ij
+            if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                Xpred, _ = self.design_space.unfold_x(X)
+                Xpred_norma = (Xpred - self.X2_offset) / self.X2_scale
+                # Get pairwise componentwise L1-distances to the input training set
+                dx = differences(Xpred_norma, Y=self.X2_norma[str(0)].copy())
+            r_ = self._matrix_data_corr(
+                corr=self.options["corr"],
+                design_space=self.design_space,
+                power=self.options["pow_exp_power"],
+                theta=self.optimal_theta[0],
+                theta_bounds=self.options["theta_bounds"],
+                dx=dx,
+                Lij=Lij,
+                n_levels=self.n_levels,
+                cat_features=self.cat_features,
+                cat_kernel=self.options["categorical_kernel"],
+                x=X,
+            ).reshape(n_eval, self.nt_all[0])
+
         gamma = self.optimal_par[0]["gamma"]
 
         # Scaled predictor
@@ -525,12 +737,58 @@ def predict_variances_all_levels(self, X):
         for i in range(1, nlevel):
             F = self.F_all[i]
             C = self.optimal_par[i]["C"]
+
             g = self._regression_types[self.options["rho_regr"]](X)
-            dx = self._differences(X, Y=self.X_norma_all[i])
-            d = self._componentwise_distance(dx)
-            r_ = self._correlation_types[self.options["corr"]](
-                self.optimal_theta[i], d
-            ).reshape(n_eval, self.nt_all[i])
+
+            if self.is_continuous:
+                dx = self._differences(X, Y=self.X_norma_all[i])
+                d = self._componentwise_distance(dx)
+                r_ = self._correlation_types[self.options["corr"]](
+                    self.optimal_theta[i], d
+                ).reshape(n_eval, self.nt_all[i])
+            else:
+                _, y_is_acting = self.design_space.correct_get_acting(self.X[i])
+                _, x_is_acting = self.design_space.correct_get_acting(X)
+                dx = gower_componentwise_distances(
+                    X,
+                    x_is_acting=x_is_acting,
+                    design_space=self.design_space,
+                    hierarchical_kernel=self.options["hierarchical_kernel"],
+                    y=np.copy(self.X[i]),
+                    y_is_acting=y_is_acting,
+                )
+                d = componentwise_distance(
+                    dx,
+                    self.options["corr"],
+                    self.nx,
+                    power=self.options["pow_exp_power"],
+                    theta=None,
+                    return_derivative=False,
+                )
+                _, ij = cross_distances(X, self.X[i])
+                Lij, _ = cross_levels(
+                    X=X, ij=ij, design_space=self.design_space, y=self.X[i]
+                )
+                self.ij = ij
+                if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                    Xpred, _ = self.design_space.unfold_x(X)
+                    Xpred_norma = (Xpred - self.X2_offset) / self.X2_scale
+                    # Get pairwise componentwise L1-distances to the input training set
+                    dx = differences(Xpred_norma, Y=self.X2_norma[str(i)].copy())
+                r_ = self._matrix_data_corr(
+                    corr=self.options["corr"],
+                    design_space=self.design_space,
+                    power=self.options["pow_exp_power"],
+                    theta=self.optimal_theta[i],
+                    theta_bounds=self.options["theta_bounds"],
+                    dx=dx,
+                    Lij=Lij,
+                    n_levels=self.n_levels,
+                    cat_features=self.cat_features,
+                    cat_kernel=self.options["categorical_kernel"],
+                    x=X,
+                ).reshape(n_eval, self.nt_all[i])
+
             f = np.vstack((g.T * mu[:, i - 1], f0.T))
 
             Ft = solve_triangular(C, F, lower=True)
@@ -695,25 +953,48 @@ def _check_param(self):
                     "MFKPLSK only works with a squared exponential kernel (until we prove the contrary)"
                 )
 
+        n_param = d
+
+        if self.options["categorical_kernel"] is not None:
+            if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                n_comp = self.options["n_comp"] if "n_comp" in self.options else None
+                mat_dim = (
+                    self.options["cat_kernel_comps"]
+                    if "cat_kernel_comps" in self.options
+                    else None
+                )
+
+                n_param = compute_n_param(
+                    self.design_space,
+                    self.options["categorical_kernel"],
+                    d,
+                    n_comp,
+                    mat_dim,
+                )
+            else:
+                raise ValueError(
+                    "Only the continuous relaxation kernel is available with multi-fidelity"
+                )
+
         if isinstance(self.options["theta0"], np.ndarray):
-            if self.options["theta0"].shape != (self.nlvl, d):
+            if self.options["theta0"].shape != (self.nlvl, n_param):
                 raise ValueError(
                     "the dimensions of theta0 %s should coincide to the number of dim %s"
-                    % (self.options["theta0"].shape, (self.nlvl, d))
+                    % (self.options["theta0"].shape, (self.nlvl, n_param))
                 )
         else:
-            if len(self.options["theta0"]) != d:
+            if len(self.options["theta0"]) != n_param:
                 if len(self.options["theta0"]) == 1:
-                    self.options["theta0"] *= np.ones((self.nlvl, d))
+                    self.options["theta0"] *= np.ones((self.nlvl, n_param))
                 elif len(self.options["theta0"]) == self.nlvl:
                     self.options["theta0"] = np.array(self.options["theta0"]).reshape(
                         -1, 1
                     )
-                    self.options["theta0"] *= np.ones((1, d))
+                    self.options["theta0"] *= np.ones((1, n_param))
                 else:
                     raise ValueError(
                         "the length of theta0 (%s) should be equal to the number of dim (%s) or levels of fidelity (%s)."
-                        % (len(self.options["theta0"]), d, self.nlvl)
+                        % (len(self.options["theta0"]), n_param, self.nlvl)
                     )
             else:
                 self.options["theta0"] *= np.ones((self.nlvl, 1))
diff --git a/smt/applications/mixed_integer.py b/smt/applications/mixed_integer.py
index eafb16e9e..e9076c274 100644
--- a/smt/applications/mixed_integer.py
+++ b/smt/applications/mixed_integer.py
@@ -134,7 +134,7 @@ def set_training_values(self, xt, yt, name=None) -> None:
         else:
             xt_apply = xt
 
-        super().set_training_values(xt_apply, yt)
+        super().set_training_values(xt_apply, yt, name)
         self._surrogate.set_training_values(xt_apply, yt, name)
 
     def update_training_values(self, yt, name=None):
@@ -148,10 +148,18 @@ def predict_values(self, x: np.ndarray) -> np.ndarray:
         x_corr, is_acting = self._get_x_for_surrogate_model(x)
         return self._surrogate.predict_values(x_corr)
 
+    def _predict_intermediate_values(self, x: np.ndarray, lvl) -> np.ndarray:
+        x_corr, is_acting = self._get_x_for_surrogate_model(x)
+        return self._surrogate._predict_intermediate_values(x_corr, lvl)
+
     def predict_variances(self, x: np.ndarray) -> np.ndarray:
         x_corr, is_acting = self._get_x_for_surrogate_model(x)
         return self._surrogate.predict_variances(x_corr)
 
+    def predict_variances_all_levels(self, x: np.ndarray) -> np.ndarray:
+        x_corr, is_acting = self._get_x_for_surrogate_model(x)
+        return self._surrogate.predict_variances_all_levels(x_corr)
+
     def _get_x_for_surrogate_model(self, x):
         xp = ensure_2d_array(x, "xp")
 
@@ -238,7 +246,7 @@ def set_training_values(self, xt, yt, name=None, is_acting=None):
         else:
             xt_apply, is_acting_apply = xt, is_acting
 
-        super().set_training_values(xt_apply, yt, is_acting=is_acting_apply)
+        super().set_training_values(xt_apply, yt, name, is_acting=is_acting_apply)
         self._surrogate.set_training_values(
             xt_apply, yt, name, is_acting=is_acting_apply
         )
@@ -254,10 +262,22 @@ def predict_values(self, x: np.ndarray, is_acting=None) -> np.ndarray:
         x_corr, is_acting = self._get_x_for_surrogate_model(x)
         return self._surrogate.predict_values(x_corr, is_acting=is_acting)
 
+    def _predict_intermediate_values(
+        self, x: np.ndarray, lvl, is_acting=None
+    ) -> np.ndarray:
+        x_corr, is_acting = self._get_x_for_surrogate_model(x)
+        return self._surrogate._predict_intermediate_values(
+            x_corr, lvl, is_acting=is_acting
+        )
+
     def predict_variances(self, x: np.ndarray, is_acting=None) -> np.ndarray:
         x_corr, is_acting = self._get_x_for_surrogate_model(x)
         return self._surrogate.predict_variances(x_corr, is_acting=is_acting)
 
+    def predict_variances_all_levels(self, x: np.ndarray, is_acting=None) -> np.ndarray:
+        x_corr, is_acting = self._get_x_for_surrogate_model(x)
+        return self._surrogate.predict_variances_all_levels(x_corr, is_acting=is_acting)
+
     def _get_x_for_surrogate_model(self, x):
         xp = ensure_2d_array(x, "xp")
 
diff --git a/smt/applications/tests/test_ego.py b/smt/applications/tests/test_ego.py
index 99924b100..b0af816df 100644
--- a/smt/applications/tests/test_ego.py
+++ b/smt/applications/tests/test_ego.py
@@ -854,7 +854,7 @@ def _evaluate(self, x, kx):
                 assert kx is None
                 response = self.super._evaluate(x, kx)
                 sens = np.hstack(
-                    self.super._evaluate(x, ki) for ki in range(x.shape[1])
+                    [self.super._evaluate(x, ki) for ki in range(x.shape[1])]
                 )
                 return np.hstack((response, sens))
 
@@ -1204,9 +1204,7 @@ def function_test_mixed_integer(X):
         )
         mixint = MixedIntegerContext(design_space)
         n_doe = 3
-        sampling = mixint.build_sampling_method(
-            LHS, criterion="ese", random_state=random_state
-        )
+        sampling = mixint.build_sampling_method(random_state=random_state)
         xdoe = sampling(n_doe)
         ydoe = function_test_mixed_integer(xdoe)
 
diff --git a/smt/applications/tests/test_mfk_mfkpls_mixed.py b/smt/applications/tests/test_mfk_mfkpls_mixed.py
new file mode 100644
index 000000000..ccbb5587f
--- /dev/null
+++ b/smt/applications/tests/test_mfk_mfkpls_mixed.py
@@ -0,0 +1,990 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Oct  6 14:47:08 2023
+
+@author: rcharayr
+"""
+
+import unittest
+import numpy as np
+import matplotlib
+import itertools
+
+matplotlib.use("Agg")
+
+
+import numpy.linalg as npl
+import matplotlib.pyplot as plt
+
+from smt.applications.mfkpls import MFKPLS
+from smt.applications.mfk import MFK
+
+from smt.applications.mixed_integer import (
+    MixedIntegerContext,
+    MixedIntegerSamplingMethod,
+    MixedIntegerKrigingModel,
+    MixedIntegerSurrogateModel,
+)
+
+from smt.sampling_methods import LHS
+
+from smt.surrogate_models import (
+    KRG,
+    KPLS,
+    QP,
+    MixIntKernelType,
+    MixHrcKernelType,
+)
+
+import warnings
+
+warnings.filterwarnings("ignore")
+
+from smt.utils.design_space import (
+    DesignSpace,
+    FloatVariable,
+    IntegerVariable,
+    OrdinalVariable,
+    CategoricalVariable,
+)
+
+
+class TestMFKmixed(unittest.TestCase):
+    # useful functions (g and h)
+    def g_ZDT(self, x):
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        out = 1 + (9 / (6 - 1)) * (x1 + x2 + x3 + x4 + x5)
+        return out
+
+    def g_DTLZ5(self, x):
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        out = (
+            (x1 - 0.5) ** 2
+            + (x2 - 0.5) ** 2
+            + (x3 - 0.5) ** 2
+            + (x4 - 0.5) ** 2
+            + (x5 - 0.5) ** 2
+        )
+        return out
+
+    def h_ZDT1(self, x):
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        out = 1 - np.sqrt(x0 / self.g_ZDT(x))
+        return out
+
+    def h_ZDT2(self, x):
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        out = 1 - (x0 / self.g_ZDT(x)) ** 2
+        return out
+
+    def h_ZDT3(self, x):
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        out = (
+            1
+            - np.sqrt(x0 / self.g_ZDT(x))
+            - (x0 / self.g_ZDT(x)) * np.sin(10 * np.pi * x0)
+        )
+        return out
+
+    # grouped functions
+    def ZDT1_HF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = x0
+        y2 = self.g_ZDT(x) * self.h_ZDT1(x)
+
+        fail = False
+        return [y1, y2], fail
+
+    def ZDT1_LF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = 0.9 * x0 + 0.1
+        y2 = (0.8 * self.g_ZDT(x) - 0.2) * (1.2 * self.h_ZDT1(x) + 0.2)
+
+        fail = False
+        return [y1, y2], fail
+
+    ###############################################################################
+    ###############################################################################
+    ###############################################################################
+
+    def ZDT2_HF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = x0
+        y2 = self.g_ZDT(x) * self.h_ZDT2(x)
+
+        fail = False
+        return [y1, y2], fail
+
+    def ZDT2_LF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = 0.8 * x0 + 0.2
+        y2 = (0.9 * self.g_ZDT(x) + 0.2) * (1.1 * self.h_ZDT2(x) - 0.2)
+
+        fail = False
+        return [y1, y2], fail
+
+    ###############################################################################
+    ###############################################################################
+    ###############################################################################
+
+    def ZDT3_HF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = x0
+        y2 = self.g_ZDT(x) * self.h_ZDT3(x)
+
+        fail = False
+        return [y1, y2], fail
+
+    def ZDT3_LF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = 0.75 * x0 + 0.25
+        y2 = self.g_ZDT(x) * (1.25 * self.h_ZDT3(x) - 0.25)
+
+        fail = False
+        return [y1, y2], fail
+
+    ###############################################################################
+    ###############################################################################
+    ###############################################################################
+
+    def DTLZ5_HF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = (1 + self.g_DTLZ5(x)) * np.cos(0.5 * np.pi * x0)
+        y2 = (1 + self.g_DTLZ5(x)) * np.sin(0.5 * np.pi * x0)
+
+        fail = False
+        return [y1, y2], fail
+
+    def DTLZ5_LF_mixt(self, x):
+        fail = True
+
+        x0 = x[0]
+        x1 = x[1]
+        x2 = x[2]
+        x3 = x[3]
+        x4 = x[4]
+        if x[5] == 0:  # 0 -> blue
+            x5 = 0.1
+        if x[5] == 1:  # 1 -> red
+            x5 = 0.5
+        if x[5] == 2:  # 2 -> green
+            x5 = 0.75
+
+        y1 = (1 + 0.8 * self.g_DTLZ5(x)) * np.cos(0.5 * np.pi * x0)
+        y2 = (1 + 1.1 * self.g_DTLZ5(x)) * np.sin(0.5 * np.pi * x0)
+
+        fail = False
+        return [y1, y2], fail
+
+    ###############################################################################
+    ###############################################################################
+    ###############################################################################
+
+    def run_mfk_mixed_example(self):
+        # KRG_METHODS = ["krg", "kpls", "mfk", "mfkpls"]
+        # KRG_METHODS = ["krg"]
+        # KRG_METHODS = ["kpls"]
+        KRG_METHODS = ["mfk"]
+        # KRG_METHODS = ["mfkpls"]
+
+        pbm = "ZDT1"
+
+        if pbm == "ZDT1":
+            HF_fun = self.ZDT1_HF_mixt
+            LF_fun = self.ZDT1_LF_mixt
+        elif pbm == "ZDT2":
+            HF_fun = self.ZDT2_HF_mixt
+            LF_fun = self.ZDT2_LF_mixt
+        elif pbm == "ZDT3":
+            HF_fun = self.ZDT3_HF_mixt
+            LF_fun = self.ZDT3_LF_mixt
+        elif pbm == "DTLZ5":
+            HF_fun = self.DTLZ5_HF_mixt
+            LF_fun = self.DTLZ5_LF_mixt
+
+        # ------------------------------------------------------------------------------
+
+        # Instantiate the design space with all its design variables:
+        ds = DesignSpace(
+            [
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                # OrdinalVariable(['0', '1']),  # x4 ordinal: 0 and 1; order is relevant
+                IntegerVariable(0, 1),  # x4 integer between 0 and 1
+                CategoricalVariable(
+                    ["blue", "red", "green"]
+                ),  # x5 categorical: blue, red or green; order is not relevant
+            ]
+        )
+
+        # ------------------------------------------------------------------------------
+
+        # Validation data:
+        n_valid = 100  # validation set size
+        x_valid, is_acting_valid = ds.sample_valid_x(n_valid)
+
+        y1_valid = np.zeros(n_valid)  # obj 1
+        y2_valid = np.zeros(n_valid)  # obj 2
+        y1_valid_LF = np.zeros(n_valid)  # obj 1
+        y2_valid_LF = np.zeros(n_valid)  # obj 2
+        for i in range(n_valid):
+            res = HF_fun(x_valid[i, :])
+            y1_valid[i] = res[0][0]  # obj 1
+            y2_valid[i] = res[0][1]  # obj 2
+            res_LF = LF_fun(x_valid[i, :])
+            y1_valid_LF[i] = res_LF[0][0]  # obj 1
+            y2_valid_LF[i] = res_LF[0][1]  # obj 2
+        # ------------------------------------------------------------------------------
+
+        # HF training data:
+        n_train_HF = 20  # training set size
+        xt_HF, is_acting_t_HF = ds.sample_valid_x(n_train_HF)
+
+        y1t_HF = np.zeros(n_train_HF)  # obj 1
+        y2t_HF = np.zeros(n_train_HF)  # obj 2
+        for i in range(n_train_HF):
+            res = HF_fun(xt_HF[i, :])
+            y1t_HF[i] = res[0][0]  # obj 1
+            y2t_HF[i] = res[0][1]  # obj 2
+
+        # ------------------------------------------------------------------------------
+
+        for krg_method in KRG_METHODS:
+            if "mfk" in krg_method:  # if multifi compute LF validation data
+                n_train_LF = 40  # training set size   IF MULTIFI
+                sampling = MixedIntegerSamplingMethod(LHS, ds, criterion="ese")
+                xt_LF = np.concatenate(
+                    (xt_HF, sampling.expand_lhs(xt_HF, n_train_LF - n_train_HF)), axis=0
+                )
+
+                y1t_LF = np.zeros(n_train_LF)  # obj 1
+                y2t_LF = np.zeros(n_train_LF)  # obj 2
+                for i in range(n_train_LF):
+                    res_LF = LF_fun(xt_LF[i, :])
+                    y1t_LF[i] = res_LF[0][0]  # obj 1
+                    y2t_LF[i] = res_LF[0][1]  # obj 2
+
+            if krg_method == "krg":
+                print("KRG")
+                ###########################
+                # Mono-fidelity KRG
+                ###########################
+
+                sm1 = KRG(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = KRG(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_HF, y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_HF, y2t_HF)
+
+            if krg_method == "kpls":
+                print("KPLS")
+                ###########################
+                # Mono-fidelity KPLS
+                ###########################
+
+                sm1 = KPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = KPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_HF, y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_HF, y2t_HF)
+
+            if krg_method == "mfk":
+                print("MFK")
+                ###########################
+                # MFK
+                ###########################
+
+                sm1 = MFK(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = MFK(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_LF[:, :], y1t_LF, name=0)
+                sm1.set_training_values(xt_HF[:, :], y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_LF[:, :], y2t_LF, name=0)
+                sm2.set_training_values(xt_HF[:, :], y2t_HF)
+
+            if krg_method == "mfkpls":
+                print("MFKPLS")
+                ###########################
+                # MFKKPLS
+                ##########################
+
+                sm1 = MFKPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = MFKPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_LF[:, :], y1t_LF, name=0)
+                sm1.set_training_values(xt_HF[:, :], y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_LF[:, :], y2t_LF, name=0)
+                sm2.set_training_values(xt_HF[:, :], y2t_HF)
+
+            # Train the models:
+            import time
+
+            t0 = time.time()
+            sm1.train()
+            t1 = time.time()
+            sm2.train()
+            t2 = time.time()
+            print("t1-t0=", t1 - t0)
+            print("t2-t1=", t2 - t1)
+
+            # Compute errors:
+            # obj 1
+            y1_predict = np.ravel(sm1.predict_values(x_valid))
+            if "mfk" in krg_method:
+                y1_predict_LF = np.ravel(sm1._predict_intermediate_values(x_valid, 1))
+                y1_var_predict_LF = np.ravel(sm1.predict_variances_all_levels(x_valid))
+            y1_var_predict = np.ravel(sm1.predict_variances(x_valid))
+            y1_sd_predict = np.sqrt(y1_var_predict)
+            # obj 2
+            y2_predict = np.ravel(sm2.predict_values(x_valid))
+            if "mfk" in krg_method:
+                y2_predict_LF = np.ravel(sm2._predict_intermediate_values(x_valid, 1))
+                y2_var_predict_LF = np.ravel(sm2.predict_variances_all_levels(x_valid))
+            y2_var_predict = np.ravel(sm2.predict_variances(x_valid))
+            y2_sd_predict = np.sqrt(y2_var_predict)
+
+            # obj 1:
+            # print("y1_predict - y1_valid =", y1_predict - y1_valid)
+            print("norme2(y1_predict - y1_valid) =", npl.norm(y1_predict - y1_valid, 2))
+            if "mfk" in krg_method:
+                print(
+                    "norme2(y1_predict_LF - y1_valid_LF) =",
+                    npl.norm(y1_predict_LF - y1_valid_LF, 2),
+                )
+            # print("y1_sd_predict =", y1_sd_predict)
+
+            # obj 2:
+            # print("y2_predict - y2_valid =", y2_predict - y2_valid)
+            print("norme2(y2_predict - y2_valid) =", npl.norm(y2_predict - y2_valid, 2))
+            if "mfk" in krg_method:
+                print(
+                    "norme2(y2_predict_LF - y2_valid_LF) =",
+                    npl.norm(y2_predict_LF - y2_valid_LF, 2),
+                )
+            # print("y2_sd_predict =", y2_sd_predict)
+
+            # PLOTS:
+            plt.figure()
+            plt.title("y1")
+            plt.xlabel("y1_valid")
+            plt.ylabel("y1_predict")
+            plt.scatter(y1_valid, y1_predict)
+            plt.plot(np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red")
+            plt.show()
+
+            plt.figure()
+            plt.title("y2")
+            plt.xlabel("y2_valid")
+            plt.ylabel("y2_predict")
+            plt.scatter(y2_valid, y2_predict)
+            plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+            plt.show()
+
+            if "mfk" in krg_method:
+                plt.figure()
+                plt.title("y1")
+                plt.xlabel("y1_valid_LF")
+                plt.ylabel("y1_predict_LF")
+                plt.scatter(y1_valid_LF, y1_predict_LF)
+                plt.plot(
+                    np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red"
+                )
+                plt.show()
+
+                plt.figure()
+                plt.title("y2")
+                plt.xlabel("y2_valid_LF")
+                plt.ylabel("y2_predict_LF")
+                plt.scatter(y2_valid_LF, y2_predict_LF)
+                plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+                plt.show()
+
+            # ------------------------------------------------------------------------------
+
+            # PLOTS tests:
+            plt.figure()
+            plt.title("y1")
+            plt.xlabel("y1t_HF")
+            plt.ylabel("np.ravel(sm1.predict_values(xt_HF))")
+            plt.scatter(y1t_HF, np.ravel(sm1.predict_values(xt_HF)))
+            plt.plot(np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red")
+            plt.show()
+
+            plt.figure()
+            plt.title("y2")
+            plt.xlabel("y2t_HF")
+            plt.ylabel("np.ravel(sm2.predict_values(xt_HF))")
+            plt.scatter(y2t_HF, np.ravel(sm2.predict_values(xt_HF)))
+            plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+            plt.show()
+
+            if "mfk" in krg_method:
+                plt.figure()
+                plt.title("y1")
+                plt.xlabel("y1t_LF")
+                plt.ylabel("np.ravel(sm1._predict_intermediate_values(xt_LF,1))")
+                plt.scatter(
+                    y1t_LF, np.ravel(sm1._predict_intermediate_values(xt_LF, 1))
+                )
+                plt.plot(
+                    np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red"
+                )
+                plt.show()
+
+                plt.figure()
+                plt.title("y2")
+                plt.xlabel("y2t_LF")
+                plt.ylabel("np.ravel(sm2._predict_intermediate_values(xt_LF,1))")
+                plt.scatter(
+                    y2t_LF, np.ravel(sm2._predict_intermediate_values(xt_LF, 1))
+                )
+                plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+                plt.show()
+                # ------------------------------------------------------------------------------
+
+    def run_mfkpls_mixed_example(self):
+        # KRG_METHODS = ["krg", "kpls", "mfk", "mfkpls"]
+        # KRG_METHODS = ["krg"]
+        # KRG_METHODS = ["kpls"]
+        # KRG_METHODS = ["mfk"]
+        KRG_METHODS = ["mfkpls"]
+
+        pbm = "ZDT1"
+
+        if pbm == "ZDT1":
+            HF_fun = self.ZDT1_HF_mixt
+            LF_fun = self.ZDT1_LF_mixt
+        elif pbm == "ZDT2":
+            HF_fun = self.ZDT2_HF_mixt
+            LF_fun = self.ZDT2_LF_mixt
+        elif pbm == "ZDT3":
+            HF_fun = self.ZDT3_HF_mixt
+            LF_fun = self.ZDT3_LF_mixt
+        elif pbm == "DTLZ5":
+            HF_fun = self.DTLZ5_HF_mixt
+            LF_fun = self.DTLZ5_LF_mixt
+
+        # ------------------------------------------------------------------------------
+
+        # Instantiate the design space with all its design variables:
+        ds = DesignSpace(
+            [
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                FloatVariable(0, 1),  # x0 continuous between 0 and 1
+                # OrdinalVariable(['0', '1']),  # x4 ordinal: 0 and 1; order is relevant
+                IntegerVariable(0, 1),  # x4 integer between 0 and 1
+                CategoricalVariable(
+                    ["blue", "red", "green"]
+                ),  # x5 categorical: blue, red or green; order is not relevant
+            ]
+        )
+
+        # ------------------------------------------------------------------------------
+
+        # Validation data:
+        n_valid = 100  # validation set size
+        x_valid, is_acting_valid = ds.sample_valid_x(n_valid)
+
+        y1_valid = np.zeros(n_valid)  # obj 1
+        y2_valid = np.zeros(n_valid)  # obj 2
+        y1_valid_LF = np.zeros(n_valid)  # obj 1
+        y2_valid_LF = np.zeros(n_valid)  # obj 2
+        for i in range(n_valid):
+            res = HF_fun(x_valid[i, :])
+            y1_valid[i] = res[0][0]  # obj 1
+            y2_valid[i] = res[0][1]  # obj 2
+            res_LF = LF_fun(x_valid[i, :])
+            y1_valid_LF[i] = res_LF[0][0]  # obj 1
+            y2_valid_LF[i] = res_LF[0][1]  # obj 2
+        # ------------------------------------------------------------------------------
+
+        # HF training data:
+        n_train_HF = 20  # training set size
+        xt_HF, is_acting_t_HF = ds.sample_valid_x(n_train_HF)
+
+        y1t_HF = np.zeros(n_train_HF)  # obj 1
+        y2t_HF = np.zeros(n_train_HF)  # obj 2
+        for i in range(n_train_HF):
+            res = HF_fun(xt_HF[i, :])
+            y1t_HF[i] = res[0][0]  # obj 1
+            y2t_HF[i] = res[0][1]  # obj 2
+
+        # ------------------------------------------------------------------------------
+
+        for krg_method in KRG_METHODS:
+            if "mfk" in krg_method:  # if multifi compute LF validation data
+                n_train_LF = 40  # training set size   IF MULTIFI
+                sampling = MixedIntegerSamplingMethod(LHS, ds, criterion="ese")
+                xt_LF = np.concatenate(
+                    (xt_HF, sampling.expand_lhs(xt_HF, n_train_LF - n_train_HF)), axis=0
+                )
+
+                y1t_LF = np.zeros(n_train_LF)  # obj 1
+                y2t_LF = np.zeros(n_train_LF)  # obj 2
+                for i in range(n_train_LF):
+                    res_LF = LF_fun(xt_LF[i, :])
+                    y1t_LF[i] = res_LF[0][0]  # obj 1
+                    y2t_LF[i] = res_LF[0][1]  # obj 2
+
+            if krg_method == "krg":
+                print("KRG")
+                ###########################
+                # Mono-fidelity KRG
+                ###########################
+
+                sm1 = KRG(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = KRG(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_HF, y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_HF, y2t_HF)
+
+            if krg_method == "kpls":
+                print("KPLS")
+                ###########################
+                # Mono-fidelity KPLS
+                ###########################
+
+                sm1 = KPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = KPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_HF, y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_HF, y2t_HF)
+
+            if krg_method == "mfk":
+                print("MFK")
+                ###########################
+                # MFK
+                ###########################
+
+                sm1 = MFK(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = MFK(
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_LF[:, :], y1t_LF, name=0)
+                sm1.set_training_values(xt_HF[:, :], y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_LF[:, :], y2t_LF, name=0)
+                sm2.set_training_values(xt_HF[:, :], y2t_HF)
+
+            if krg_method == "mfkpls":
+                print("MFKPLS")
+                ###########################
+                # MFKKPLS
+                ##########################
+
+                sm1 = MFKPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+                sm2 = MFKPLS(
+                    n_comp=3,
+                    design_space=ds,
+                    theta0=[1e-2],
+                    print_prediction=False,
+                    corr="squar_exp",
+                    categorical_kernel=MixIntKernelType.CONT_RELAX,
+                )
+
+                # Set training data
+                # obj 1
+                sm1.set_training_values(xt_LF[:, :], y1t_LF, name=0)
+                sm1.set_training_values(xt_HF[:, :], y1t_HF)
+                # obj 2
+                sm2.set_training_values(xt_LF[:, :], y2t_LF, name=0)
+                sm2.set_training_values(xt_HF[:, :], y2t_HF)
+
+            # Train the models:
+            import time
+
+            t0 = time.time()
+            sm1.train()
+            t1 = time.time()
+            sm2.train()
+            t2 = time.time()
+            print("t1-t0=", t1 - t0)
+            print("t2-t1=", t2 - t1)
+
+            # Compute errors:
+            # obj 1
+            y1_predict = np.ravel(sm1.predict_values(x_valid))
+            if "mfk" in krg_method:
+                y1_predict_LF = np.ravel(sm1._predict_intermediate_values(x_valid, 1))
+                y1_var_predict_LF = np.ravel(sm1.predict_variances_all_levels(x_valid))
+            y1_var_predict = np.ravel(sm1.predict_variances(x_valid))
+            y1_sd_predict = np.sqrt(y1_var_predict)
+            # obj 2
+            y2_predict = np.ravel(sm2.predict_values(x_valid))
+            if "mfk" in krg_method:
+                y2_predict_LF = np.ravel(sm2._predict_intermediate_values(x_valid, 1))
+                y2_var_predict_LF = np.ravel(sm2.predict_variances_all_levels(x_valid))
+            y2_var_predict = np.ravel(sm2.predict_variances(x_valid))
+            y2_sd_predict = np.sqrt(y2_var_predict)
+
+            # obj 1:
+            # print("y1_predict - y1_valid =", y1_predict - y1_valid)
+            print("norme2(y1_predict - y1_valid) =", npl.norm(y1_predict - y1_valid, 2))
+            if "mfk" in krg_method:
+                print(
+                    "norme2(y1_predict_LF - y1_valid_LF) =",
+                    npl.norm(y1_predict_LF - y1_valid_LF, 2),
+                )
+            # print("y1_sd_predict =", y1_sd_predict)
+
+            # obj 2:
+            # print("y2_predict - y2_valid =", y2_predict - y2_valid)
+            print("norme2(y2_predict - y2_valid) =", npl.norm(y2_predict - y2_valid, 2))
+            if "mfk" in krg_method:
+                print(
+                    "norme2(y2_predict_LF - y2_valid_LF) =",
+                    npl.norm(y2_predict_LF - y2_valid_LF, 2),
+                )
+            # print("y2_sd_predict =", y2_sd_predict)
+
+            # PLOTS:
+            plt.figure()
+            plt.title("y1")
+            plt.xlabel("y1_valid")
+            plt.ylabel("y1_predict")
+            plt.scatter(y1_valid, y1_predict)
+            plt.plot(np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red")
+            plt.show()
+
+            plt.figure()
+            plt.title("y2")
+            plt.xlabel("y2_valid")
+            plt.ylabel("y2_predict")
+            plt.scatter(y2_valid, y2_predict)
+            plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+            plt.show()
+
+            if "mfk" in krg_method:
+                plt.figure()
+                plt.title("y1")
+                plt.xlabel("y1_valid_LF")
+                plt.ylabel("y1_predict_LF")
+                plt.scatter(y1_valid_LF, y1_predict_LF)
+                plt.plot(
+                    np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red"
+                )
+                plt.show()
+
+                plt.figure()
+                plt.title("y2")
+                plt.xlabel("y2_valid_LF")
+                plt.ylabel("y2_predict_LF")
+                plt.scatter(y2_valid_LF, y2_predict_LF)
+                plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+                plt.show()
+
+            # ------------------------------------------------------------------------------
+
+            # PLOTS tests:
+            plt.figure()
+            plt.title("y1")
+            plt.xlabel("y1t_HF")
+            plt.ylabel("np.ravel(sm1.predict_values(xt_HF))")
+            plt.scatter(y1t_HF, np.ravel(sm1.predict_values(xt_HF)))
+            plt.plot(np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red")
+            plt.show()
+
+            plt.figure()
+            plt.title("y2")
+            plt.xlabel("y2t_HF")
+            plt.ylabel("np.ravel(sm2.predict_values(xt_HF))")
+            plt.scatter(y2t_HF, np.ravel(sm2.predict_values(xt_HF)))
+            plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+            plt.show()
+
+            if "mfk" in krg_method:
+                plt.figure()
+                plt.title("y1")
+                plt.xlabel("y1t_LF")
+                plt.ylabel("np.ravel(sm1._predict_intermediate_values(xt_LF,1))")
+                plt.scatter(
+                    y1t_LF, np.ravel(sm1._predict_intermediate_values(xt_LF, 1))
+                )
+                plt.plot(
+                    np.linspace(0, 1.5, 100), np.linspace(0, 1.5, 100), color="red"
+                )
+                plt.show()
+
+                plt.figure()
+                plt.title("y2")
+                plt.xlabel("y2t_LF")
+                plt.ylabel("np.ravel(sm2._predict_intermediate_values(xt_LF,1))")
+                plt.scatter(
+                    y2t_LF, np.ravel(sm2._predict_intermediate_values(xt_LF, 1))
+                )
+                plt.plot(np.linspace(0, 9, 100), np.linspace(0, 9, 100), color="red")
+                plt.show()
+                # ------------------------------------------------------------------------------
+
+
+if __name__ == "__main__":
+    TestMFKmixed().run_mfk_mixed_example()
+    TestMFKmixed().run_mfkpls_mixed_example()
+    unittest.main()
diff --git a/smt/examples/multi_modal/run_genn_demo.py b/smt/examples/multi_modal/run_genn_demo.py
index b915f09f7..9b9edf786 100644
--- a/smt/examples/multi_modal/run_genn_demo.py
+++ b/smt/examples/multi_modal/run_genn_demo.py
@@ -14,7 +14,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
-from pyDOE2 import fullfact
+from pyDOE3 import fullfact
 
 SEED = 101
 
diff --git a/smt/sampling_methods/lhs.py b/smt/sampling_methods/lhs.py
index 7a3001ef8..b3262eec0 100644
--- a/smt/sampling_methods/lhs.py
+++ b/smt/sampling_methods/lhs.py
@@ -3,9 +3,9 @@
 
 This package is distributed under New BSD license.
 
-LHS sampling; uses the pyDOE2 package.
+LHS sampling; uses the pyDOE3 package.
 """
-from pyDOE2 import lhs
+from pyDOE3 import lhs
 from scipy.spatial.distance import pdist, cdist
 import numpy as np
 
diff --git a/smt/surrogate_models/__init__.py b/smt/surrogate_models/__init__.py
index 49877863f..dfa3ff4f3 100644
--- a/smt/surrogate_models/__init__.py
+++ b/smt/surrogate_models/__init__.py
@@ -6,6 +6,7 @@
 from .kplsk import KPLSK
 from .genn import GENN
 from .mgp import MGP
+from .sgp import SGP
 
 from .krg_based import MixIntKernelType
 from smt.utils.design_space import (
diff --git a/smt/surrogate_models/krg_based.py b/smt/surrogate_models/krg_based.py
index 2b3aa1ddb..fa566fdfc 100644
--- a/smt/surrogate_models/krg_based.py
+++ b/smt/surrogate_models/krg_based.py
@@ -343,7 +343,7 @@ def _new_train(self):
             self.is_acting_points[None] = is_acting
 
         # Compute PLS-coefficients (attr of self) and modified X and y (if GEKPLS is used)
-        if self.name not in ["Kriging", "MGP"]:
+        if self.name not in ["Kriging", "MGP", "SGP"]:
             if self.is_continuous:
                 X, y = self._compute_pls(X.copy(), y.copy())
 
@@ -594,8 +594,16 @@ def _matrix_data_corr(
         ) = self._initialize_theta(theta, n_levels, cat_features, cat_kernel)
 
         # Sampling points X and y
-        X = self.training_points[None][0][0]
-        y = self.training_points[None][0][1]
+        if "MFK" in self.name:
+            if self._lvl < self.nlvl - 1:
+                X = self.training_points[self._lvl][0][0]
+                y = self.training_points[self._lvl][0][1]
+            elif self._lvl == self.nlvl - 1:
+                X = self.training_points[None][0][0]
+                y = self.training_points[None][0][1]
+        else:
+            X = self.training_points[None][0][0]
+            y = self.training_points[None][0][1]
 
         if cat_kernel == MixIntKernelType.CONT_RELAX:
             X_pls_space, _ = design_space.unfold_x(X)
@@ -800,6 +808,39 @@ def _reduced_likelihood_function(self, theta):
             noise = tmp_var[-1]
         if not (self.is_continuous):
             dx = self.D
+
+            if self.options["categorical_kernel"] == MixIntKernelType.CONT_RELAX:
+                if "MFK" in self.name:
+                    if (
+                        self._lvl == self.nlvl - 1
+                    ):  # highest fidelity identified by the key None
+                        X2, _ = self.design_space.unfold_x(
+                            self.training_points[None][0][0]
+                        )
+                        self.X2_norma[str(self._lvl)] = (
+                            X2 - self.X2_offset
+                        ) / self.X2_scale
+                        dx, _ = cross_distances(self.X2_norma[str(self._lvl)])
+                    elif self._lvl < self.nlvl - 1:
+                        X2, _ = self.design_space.unfold_x(
+                            self.training_points[self._lvl][0][0]
+                        )
+                        self.X2_norma[str(self._lvl)] = (
+                            X2 - self.X2_offset
+                        ) / self.X2_scale
+                        dx, _ = cross_distances(self.X2_norma[str(self._lvl)])
+                else:
+                    X2, _ = self.design_space.unfold_x(self.training_points[None][0][0])
+                    (
+                        self.X2_norma,
+                        _,
+                        self.X2_offset,
+                        _,
+                        self.X2_scale,
+                        _,
+                    ) = standardization(X2, self.training_points[None][0][1])
+                    dx, _ = cross_distances(self.X2_norma)
+
             r = self._matrix_data_corr(
                 corr=self.options["corr"],
                 design_space=self.design_space,
diff --git a/smt/surrogate_models/sgp.py b/smt/surrogate_models/sgp.py
new file mode 100644
index 000000000..cb294d60a
--- /dev/null
+++ b/smt/surrogate_models/sgp.py
@@ -0,0 +1,318 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 30 09:30:40 2023
+
+@author: hvalayer, rlafage 
+
+This implementation in SMT is derived from FITC/VarDTC methods from
+Sparse GP implementations of GPy project. See https://github.com/SheffieldML/GPy
+"""
+
+import numpy as np
+from scipy import linalg
+
+from smt.surrogate_models.krg import KRG
+from smt.utils.checks import ensure_2d_array
+from smt.utils.kriging import differences
+from smt.utils.misc import standardization
+
+
+class SGP(KRG):
+    name = "SGP"
+
+    def _initialize(self):
+        super()._initialize()
+
+        declare = self.options.declare
+        declare(
+            "corr",
+            "squar_exp",  # gaussian kernel only
+            values=("squar_exp"),
+            desc="Correlation function type",
+            types=(str),
+        )
+        declare(
+            "poly",
+            "constant",  # constant mean function
+            values=("constant"),
+            desc="Regression function type",
+            types=(str),
+        )
+        declare(
+            "theta_bounds",
+            [1e-6, 1e2],  # upper bound increased compared to kriging-based one
+            types=(list, np.ndarray),
+            desc="bounds for hyperparameters",
+        )
+        declare(
+            "noise0",
+            [1e-2],
+            desc="Gaussian noise on observed training data",
+            types=(list, np.ndarray),
+        )
+        declare(
+            "eval_noise",
+            True,
+            types=bool,
+            values=(True, False),
+            desc="Noise is always evaluated",
+        )
+        declare(
+            "nugget",
+            1e-8,  # increased compared to kriging-based one
+            types=(float),
+            desc="a jitter for numerical stability",
+        )
+        declare(
+            "method",
+            "FITC",
+            values=("FITC", "VFE"),
+            desc="Method used by sparse GP model",
+            types=(str),
+        )
+        declare("n_inducing", 10, desc="Number of inducing inputs", types=int)
+
+        supports = self.supports
+        supports["derivatives"] = False
+        supports["variances"] = True
+        supports["variance_derivatives"] = False
+        supports["x_hierarchy"] = False
+
+        self.Z = None
+        self.woodbury_data = {"vec": None, "inv": None}
+        self.optimal_par = {}
+        self.optimal_noise = None
+
+    def set_inducing_inputs(self, Z=None, normalize=False):
+        """
+        Define number of inducing inputs or set the locations manually.
+        When Z is not specified then points are picked randomly amongst the inputs training set.
+
+        Parameters
+        ----------
+        nz : int, optional
+            Number of inducing inputs.
+        Z : np.ndarray [M,ndim], optional
+            Inducing inputs.
+        normalize : When Z is given, whether values should be normalized
+        """
+        if Z is None:
+            self.nz = self.options["n_inducing"]
+            X = self.training_points[None][0][0]  # [nt,nx]
+            random_idx = np.random.permutation(self.nt)[: self.nz]
+            self.Z = X[random_idx].copy()  # [nz,nx]
+        else:
+            Z = ensure_2d_array(Z, "Z")
+            if self.nx != Z.shape[1]:
+                raise ValueError("DimensionError: Z.shape[1] != X.shape[1]")
+            self.Z = Z  # [nz,nx]
+            if normalize:
+                X = self.training_points[None][0][0]  # [nt,nx]
+                y = self.training_points[None][0][1]
+                self.normalize = True
+                (
+                    _,
+                    _,
+                    X_offset,
+                    _,
+                    X_scale,
+                    _,
+                ) = standardization(X, y)
+                self.Z = (self.Z - X_offset) / X_scale
+            else:
+                self.normalize = False
+            self.nz = Z.shape[0]
+
+    # overload kriging based implementation
+    def _new_train(self):
+        if self.Z is None:
+            self.set_inducing_inputs()
+
+        # make sure the latent function is scalars
+        Y = self.training_points[None][0][1]
+        _, output_dim = Y.shape
+        if output_dim > 1:
+            raise NotImplementedError("FITC does not support vector-valued function")
+
+        # make sure the noise is not hetero
+        eta2 = np.array(self.options["noise0"])  # likelihood.gaussian_variance(Y_norma)
+        if eta2.size > 1:
+            raise NotImplementedError("FITC does not support heteroscedastic noise")
+
+        return super()._new_train()
+
+    # overload kriging based implementation
+    def _reduced_likelihood_function(self, theta):
+        X = self.training_points[None][0][0]
+        Y = self.training_points[None][0][1]
+        Z = self.Z
+
+        if self.options["eval_noise"]:
+            sigma2 = theta[-1]
+            theta = theta[0:-1]
+        else:
+            sigma2 = self.options["noise0"]
+
+        nugget = self.options["nugget"]
+
+        if self.options["method"] == "VFE":
+            likelihood, w_vec, w_inv = self._vfe(X, Y, Z, theta, sigma2, nugget)
+        else:
+            likelihood, w_vec, w_inv = self._fitc(X, Y, Z, theta, sigma2, nugget)
+
+        self.woodbury_data["vec"] = w_vec
+        self.woodbury_data["inv"] = w_inv
+
+        params = {
+            "theta": theta,
+            "sigma2": sigma2,
+        }
+
+        return likelihood, params
+
+    def _compute_K(self, A: np.ndarray, B: np.ndarray, theta, sigma2):
+        """
+        Compute the covariance matrix K between A and B
+        """
+        # Compute pairwise componentwise L1-distances between A and B
+        dx = differences(A, B)
+        d = self._componentwise_distance(dx)
+        # Compute the correlation vector r and matrix R
+        r = self._correlation_types[self.options["corr"]](theta, d)
+        R = r.reshape(A.shape[0], B.shape[0])
+        # Compute the covariance matrix K
+        K = sigma2 * R
+        return K
+
+    def _fitc(self, X, Y, Z, theta, sigma2, nugget):
+        """
+        FITC method implementation.
+
+        See also https://github.com/SheffieldML/GPy/blob/9ec3e50e3b96a10db58175a206ed998ec5a8e3e3/GPy/inference/latent_function_inference/fitc.py
+        """
+
+        # Compute: diag(Knn), Kmm and Kmn
+        Knn = np.full(self.nt, sigma2)
+        Kmm = self._compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget
+        Kmn = self._compute_K(Z, X, theta, sigma2)
+
+        # Compute (lower) Cholesky decomposition: Kmm = U U^T
+        U = linalg.cholesky(Kmm, lower=True)
+
+        # Compute (upper) Cholesky decomposition: Qnn = V^T V
+        Ui = linalg.inv(U)
+        V = Ui @ Kmn
+
+        # Assumption on the gaussian noise on training outputs
+        eta2 = np.array(self.options["noise0"])
+
+        # Compute diagonal correction: nu = Knn_diag - Qnn_diag + \eta^2
+        nu = Knn - np.sum(np.square(V), 0) + eta2
+        # Compute beta, the effective noise precision
+        beta = 1.0 / nu
+
+        # Compute (lower) Cholesky decomposition: A = I + V diag(beta) V^T = L L^T
+        A = np.eye(self.nz) + V @ np.diag(beta) @ V.T
+        L = linalg.cholesky(A, lower=True)
+        Li = linalg.inv(L)
+
+        # Compute a and b
+        a = np.einsum("ij,i->ij", Y, beta)  # avoid reshape for mat-vec multiplication
+        b = Li @ V @ a
+
+        # Compute marginal log-likelihood
+        likelihood = -0.5 * (
+            # num_data * np.log(2.0 * np.pi)   # constant term ignored in reduced likelihood
+            +np.sum(np.log(nu))
+            + 2.0 * np.sum(np.log(np.diag(L)))
+            + a.T @ Y
+            - np.einsum("ij,ij->", b, b)
+        )
+
+        # Store Woodbury vectors for prediction step
+        LiUi = Li @ Ui
+        LiUiT = LiUi.T
+        woodbury_vec = LiUiT @ b
+        woodbury_inv = Ui.T @ Ui - LiUiT @ LiUi
+
+        return likelihood, woodbury_vec, woodbury_inv
+
+    def _vfe(self, X, Y, Z, theta, sigma2, nugget):
+        """
+        VFE method implementation.
+
+        See also https://github.com/SheffieldML/GPy/blob/9ec3e50e3b96a10db58175a206ed998ec5a8e3e3/GPy/inference/latent_function_inference/fitc.py
+        """
+
+        # Assume zero mean function
+        mean = 0
+        Y -= mean
+
+        # Compute: diag(Knn), Kmm and Kmn
+        Kmm = self._compute_K(Z, Z, theta, sigma2) + np.eye(self.nz) * nugget
+        Kmn = self._compute_K(Z, X, theta, sigma2)
+
+        # Compute (lower) Cholesky decomposition: Kmm = U U^T
+        U = linalg.cholesky(Kmm, lower=True)
+
+        # Compute (upper) Cholesky decomposition: Qnn = V^T V
+        Ui = linalg.inv(U)
+        V = Ui @ Kmn
+
+        # Compute beta, the effective noise precision
+        beta = 1.0 / np.fmax(self.options["noise0"], nugget)
+
+        # Compute A = beta * V @ V.T
+        A = beta * V @ V.T
+
+        # Compute (lower) Cholesky decomposition: B = I + A = L L^T
+        B = np.eye(self.nz) + A
+        L = linalg.cholesky(B, lower=True)
+        Li = linalg.inv(L)
+
+        # Compute b
+        b = beta * Li @ V @ Y
+
+        # Compute log-marginal likelihood
+        likelihood = -0.5 * (
+            # self.nt * np.log(2.0 * np.pi)   # constant term ignored in reduced likelihood
+            -self.nt * np.log(beta)
+            + 2.0 * np.sum(np.log(np.diag(L)))
+            + beta * np.einsum("ij,ij->", Y, Y)
+            - b.T @ b
+            + self.nt * beta * sigma2
+            - np.trace(A)
+        )
+
+        # Store Woodbury vectors for prediction step
+        LiUi = Li @ Ui
+        Bi = np.eye(self.nz) + Li.T @ Li
+        woodbury_vec = LiUi.T @ b
+        woodbury_inv = Ui.T @ Bi @ Ui
+
+        return likelihood, woodbury_vec, woodbury_inv
+
+    # overload kriging based implementation
+    def _predict_values(self, x: np.ndarray, is_acting=None) -> np.ndarray:
+        """
+        Evaluates the model at a set of points using the Woodbury vector.
+        """
+        Kx = self._compute_K(
+            x, self.Z, self.optimal_par["theta"], self.optimal_par["sigma2"]
+        )
+        mu = Kx @ self.woodbury_data["vec"]
+        return mu
+
+    # overload kriging based implementation
+    def _predict_variances(self, x: np.ndarray, is_acting=None) -> np.ndarray:
+        """
+        Evaluates the model at a set of points using the inverse Woodbury vector.
+        """
+        Kx = self._compute_K(
+            self.Z, x, self.optimal_par["theta"], self.optimal_par["sigma2"]
+        )
+        Kxx = np.full(x.shape[0], self.optimal_par["sigma2"])
+        var = (Kxx - np.sum(np.dot(self.woodbury_data["inv"].T, Kx) * Kx, 0))[:, None]
+        var = np.clip(var, 1e-15, np.inf)
+        var += self.options["noise0"]
+        return var
diff --git a/smt/surrogate_models/tests/test_sgp.py b/smt/surrogate_models/tests/test_sgp.py
new file mode 100644
index 000000000..2ca5822ba
--- /dev/null
+++ b/smt/surrogate_models/tests/test_sgp.py
@@ -0,0 +1,60 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import unittest
+
+from smt.surrogate_models import SGP
+from smt.utils.sm_test_case import SMTestCase
+from smt.utils import compute_rms_error
+
+
+def f_obj(x):
+    return (
+        np.sin(3 * np.pi * x)
+        + 0.3 * np.cos(9 * np.pi * x)
+        + 0.5 * np.sin(7 * np.pi * x)
+    )
+
+
+class TestSGP(SMTestCase):
+    def setUp(self):
+        rng = np.random.RandomState(1)
+
+        # Generate training data
+        N_train = 200
+        self.eta = [0.01]
+        gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(self.eta), size=(N_train, 1))
+        self.Xtrain = 2 * np.random.rand(N_train, 1) - 1
+        self.Ytrain = f_obj(self.Xtrain) + gaussian_noise
+
+        # Generate test data (noise-free)
+        N_test = 50
+        self.Xtest = 2 * rng.rand(N_test, 1) - 1
+        self.Ytest = f_obj(self.Xtest).reshape(-1, 1)
+
+        # Pick inducing points at random
+        N_inducing = 30
+        self.Z = 2 * rng.rand(N_inducing, 1) - 1
+
+    def test_fitc(self):
+        # Assume we know the variance eta of our noisy input data
+        sgp = SGP(noise0=self.eta)
+        sgp.set_training_values(self.Xtrain, self.Ytrain)
+        sgp.set_inducing_inputs(Z=self.Z)
+        sgp.train()
+
+        Ypred = sgp.predict_values(self.Xtest)
+        self.assert_error(Ypred, self.Ytest, atol=0.02, rtol=0.1)
+
+    def test_vfe(self):
+        # Assume we know the variance eta of our noisy input data
+        sgp = SGP(noise0=self.eta, method="VFE")
+        sgp.set_training_values(self.Xtrain, self.Ytrain)
+        sgp.set_inducing_inputs(Z=self.Z)
+        sgp.train()
+
+        Ypred = sgp.predict_values(self.Xtest)
+        self.assert_error(Ypred, self.Ytest, atol=0.05, rtol=0.1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/smt/surrogate_models/tests/test_surrogate_model_examples.py b/smt/surrogate_models/tests/test_surrogate_model_examples.py
index b6adf44ed..5b892fc36 100644
--- a/smt/surrogate_models/tests/test_surrogate_model_examples.py
+++ b/smt/surrogate_models/tests/test_surrogate_model_examples.py
@@ -620,6 +620,113 @@ def fun(x):
         fig.subplots_adjust(top=0.74)
         plt.show()
 
+    def test_sgp_fitc(self):
+        import numpy as np
+        import matplotlib.pyplot as plt
+
+        from smt.surrogate_models import SGP
+
+        def f_obj(x):
+            import numpy as np
+
+            return (
+                np.sin(3 * np.pi * x)
+                + 0.3 * np.cos(9 * np.pi * x)
+                + 0.5 * np.sin(7 * np.pi * x)
+            )
+
+        # random generator for reproducibility
+        rng = np.random.RandomState(0)
+
+        # Generate training data
+        nt = 200
+        # Variance of the gaussian noise on our trainingg data
+        eta2 = [0.01]
+        gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1))
+        xt = 2 * rng.rand(nt, 1) - 1
+        yt = f_obj(xt) + gaussian_noise
+
+        # Pick inducing points randomly in training data
+        n_inducing = 30
+        random_idx = rng.permutation(nt)[:n_inducing]
+        Z = xt[random_idx].copy()
+
+        sgp = SGP(noise0=eta2)  # Assume here we have an idea of the variance eta2
+        sgp.set_training_values(xt, yt)
+        sgp.set_inducing_inputs(Z=Z)
+        # sgp.set_inducing_inputs()  # When Z not specified inducing points are picked randomly in traing data
+        sgp.train()
+
+        x = np.linspace(-1, 1, nt + 1).reshape(-1, 1)
+        y = f_obj(x)
+        hat_y = sgp.predict_values(x)
+        var = sgp.predict_variances(x)
+
+        # plot prediction
+        plt.figure(figsize=(14, 6))
+        plt.plot(x, y, "C1-", label="target function")
+        plt.scatter(xt, yt, marker="o", s=10, label="observed data")
+        plt.plot(x, hat_y, "k-", label="Sparse GP")
+        plt.plot(x, hat_y - 3 * np.sqrt(var), "k--")
+        plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI")
+        plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points")
+        plt.ylim([-3, 3])
+        plt.legend(loc=0)
+        plt.show()
+
+    def test_sgp_vfe(self):
+        import numpy as np
+        import matplotlib.pyplot as plt
+
+        from smt.surrogate_models import SGP
+
+        def f_obj(x):
+            import numpy as np
+
+            return (
+                np.sin(3 * np.pi * x)
+                + 0.3 * np.cos(9 * np.pi * x)
+                + 0.5 * np.sin(7 * np.pi * x)
+            )
+
+        # random generator for reproducibility
+        rng = np.random.RandomState(42)
+
+        # Generate training data
+        nt = 200
+        # Variance of the gaussian noise on our trainingg data
+        eta2 = [0.01]
+        gaussian_noise = rng.normal(loc=0.0, scale=np.sqrt(eta2), size=(nt, 1))
+        xt = 2 * rng.rand(nt, 1) - 1
+        yt = f_obj(xt) + gaussian_noise
+
+        # Pick inducing points randomly in training data
+        n_inducing = 30
+        random_idx = rng.permutation(nt)[:n_inducing]
+        Z = xt[random_idx].copy()
+
+        sgp = SGP(noise0=eta2, method="VFE")
+        sgp.set_training_values(xt, yt)
+        sgp.set_inducing_inputs(Z=Z)
+        sgp.train()
+
+        x = np.linspace(-1, 1, nt + 1).reshape(-1, 1)
+        y = f_obj(x)
+        hat_y = sgp.predict_values(x)
+        var = sgp.predict_variances(x)
+
+        # plot prediction
+        plt.figure(figsize=(14, 6))
+        plt.plot(x, y, "C1-", label="target function")
+        plt.scatter(xt, yt, marker="o", s=10, label="observed data")
+        plt.plot(x, hat_y, "k-", label="Sparse GP")
+        plt.plot(x, hat_y - 3 * np.sqrt(var), "k--")
+        plt.plot(x, hat_y + 3 * np.sqrt(var), "k--", label="99% CI")
+        plt.plot(Z, -2.9 * np.ones_like(Z), "r|", mew=2, label="inducing points")
+        plt.ylim([-3, 3])
+        plt.legend(loc=0)
+        plt.show()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/smt/utils/kriging.py b/smt/utils/kriging.py
index 441a83c48..0f600f097 100644
--- a/smt/utils/kriging.py
+++ b/smt/utils/kriging.py
@@ -10,7 +10,7 @@
 import os
 from sklearn.cross_decomposition import PLSRegression as pls
 
-from pyDOE2 import bbdesign
+from pyDOE3 import bbdesign
 from sklearn.metrics.pairwise import check_pairwise_arrays
 from smt.utils.design_space import BaseDesignSpace, CategoricalVariable