diff --git a/.github/actions/conda-setup/action.yml b/.github/actions/conda-setup/action.yml
index cb665a35..75b7acc0 100644
--- a/.github/actions/conda-setup/action.yml
+++ b/.github/actions/conda-setup/action.yml
@@ -47,13 +47,18 @@ runs:
     - name: Update environment
       shell: bash -l {0}
       run: |
-        if [ -f "${{ inputs.filename }}" ]; then
+        if [ -f "${{ inputs.filename }}" ] && ! [ "${{ steps.cache.outputs.cache-hit }}" ] ; then
           mamba env update -n ${{ inputs.env_name }} -f ${{ inputs.filename }}
         else
           echo "No conda environment file found; skipping. Path: ${{ inputs.filename }}"
           mamba install -n ${{ inputs.env_name }} python=${{ inputs.python-version }}
         fi
-      if: steps.cache.outputs.cache-hit != 'true'
+    - name: Install required binaries for MPI
+      shell: bash -l {0}
+      run: |
+        if ! grep -q ${{ inputs.filename }} ; then
+          sudo apt install libopenmpi-dev
+        fi
     - name: Setup the  environment
       shell: bash -l {0}
       run: |
diff --git a/environment.yml b/environment.yml
index 3bbc04f3..1991efc9 100644
--- a/environment.yml
+++ b/environment.yml
@@ -14,6 +14,7 @@ dependencies:
   - ipywidgets
   - tqdm
   - orjson
+  - matplotlib
 # parallel
   - mpi4py
   - dask
@@ -24,16 +25,11 @@ dependencies:
   - jupyterlab>=3
   - jupyterlab-lsp
   - python-lsp-server
-  - matplotlib
   - pygments
   - mkdocs
   - mkdocstrings
   - mkdocs-material
-  # NOTE: we are installing mkdocs-jupyter with pip for now
-  # due to the following: https://github.com/conda-forge/mkdocs-jupyter-feedstock/issues/31
-  # - mkdocs-jupyter
+  - mkdocs-jupyter
   - mkdocstrings-python
   - ruff
   - typing-extensions
-  - pip:
-      - mkdocs-jupyter>=0.24.7
diff --git a/pyproject.toml b/pyproject.toml
index 24691224..77bf115a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,8 +17,17 @@ classifiers = [
   "Topic :: Scientific/Engineering",
 ]
 dependencies = [
-  # All core dependencies must be sourced from conda (conda-forge).
-  # See ``environment.yml`` for further information.
+  "deap",
+  "numpy",
+  "pydantic>=2.3",
+  "pyyaml",
+  "botorch>=0.9.2,<=0.10.0",
+  "scipy>=1.10.1",
+  "pandas",
+  "ipywidgets",
+  "tqdm",
+  "orjson",
+  "matplotlib"
 ]
 description = "Flexible optimization of arbitrary problems in Python."
 dynamic = [ "version" ]
@@ -31,6 +40,15 @@ requires-python = ">=3.9"
 dev = [
   "pytest",
   "pytest-cov",
+  "ffmpeg",
+  "pytest",
+  "pytest-cov",
+  "jupyterlab>=3",
+  "jupyterlab-lsp",
+  "python-lsp-server",
+  "pygments",
+  "dask",
+  "mpi4py"
 ]
 doc = [
   "mkdocs",
diff --git a/xopt/generators/bayesian/bax/visualize.py b/xopt/generators/bayesian/bax/visualize.py
index e9539af1..5625a98b 100644
--- a/xopt/generators/bayesian/bax/visualize.py
+++ b/xopt/generators/bayesian/bax/visualize.py
@@ -103,7 +103,7 @@ def visualize_virtual_objective(
     bounds = generator._get_optimization_bounds()
     kwargs = kwargs if kwargs else {}
     objective_values = generator.algorithm.evaluate_virtual_objective(
-        bax_model, x, bounds, tkwargs=generator._tkwargs, n_samples=n_samples, **kwargs
+        bax_model, x, bounds, tkwargs=generator.tkwargs, n_samples=n_samples, **kwargs
     )
 
     # get sample stats
diff --git a/xopt/generators/bayesian/bayesian_generator.py b/xopt/generators/bayesian/bayesian_generator.py
index 452e273c..c25d1d1b 100644
--- a/xopt/generators/bayesian/bayesian_generator.py
+++ b/xopt/generators/bayesian/bayesian_generator.py
@@ -4,13 +4,14 @@
 import warnings
 from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 import pandas as pd
 import torch
 from botorch.acquisition import FixedFeatureAcquisitionFunction, qUpperConfidenceBound
 from botorch.models.model import Model
 from botorch.sampling import get_sampler
+from botorch.utils.multi_objective import is_non_dominated
 from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning
 from gpytorch import Module
 from pydantic import Field, field_validator, PositiveInt, SerializeAsAny
@@ -384,7 +385,7 @@ def train_model(self, data: pd.DataFrame = None, update_internal=True) -> Module
             self.vocs.output_names,
             data,
             {name: variable_bounds[name] for name in self.model_input_names},
-            **self._tkwargs,
+            **self.tkwargs,
         )
 
         if update_internal:
@@ -407,8 +408,18 @@ def propose_candidates(self, model, n_candidates=1):
         # get acquisition function
         acq_funct = self.get_acquisition(model)
 
-        # get candidates
-        candidates = self.numerical_optimizer.optimize(acq_funct, bounds, n_candidates)
+        # get initial candidates to start acquisition function optimization
+        initial_points = self._get_initial_conditions(n_candidates)
+
+        # get candidates -- grid optimizer does not support batch_initial_conditions
+        if isinstance(self.numerical_optimizer, GridOptimizer):
+            candidates = self.numerical_optimizer.optimize(
+                acq_funct, bounds, n_candidates
+            )
+        else:
+            candidates = self.numerical_optimizer.optimize(
+                acq_funct, bounds, n_candidates, batch_initial_conditions=initial_points
+            )
         return candidates
 
     def get_training_data(self, data: pd.DataFrame) -> pd.DataFrame:
@@ -456,7 +467,7 @@ def get_input_data(self, data: pd.DataFrame) -> torch.Tensor:
         input names (variables), and the resulting tensor is configured with the data
         type and device settings from the generator.
         """
-        return torch.tensor(data[self.model_input_names].to_numpy(), **self._tkwargs)
+        return torch.tensor(data[self.model_input_names].to_numpy(), **self.tkwargs)
 
     def get_acquisition(self, model):
         """
@@ -536,6 +547,11 @@ def visualize_model(self, **kwargs):
         """displays the GP models"""
         return visualize_generator_model(self, **kwargs)
 
+    def _get_initial_conditions(self, n_candidates=1) -> Union[Tensor, None]:
+        """overwrite if algorithm should specifiy initial candidates for optimizing
+        the acquisition function"""
+        return None
+
     def _process_candidates(self, candidates: Tensor):
         """process pytorch candidates from optimizing the acquisition function"""
         logger.debug(f"Best candidate from optimize {candidates}")
@@ -581,7 +597,7 @@ def _get_objective(self):
 
             return self.custom_objective
         else:
-            return create_mc_objective(self.vocs, self._tkwargs)
+            return create_mc_objective(self.vocs, self.tkwargs)
 
     def _get_constraint_callables(self):
         """return constratint callable determined by vocs"""
@@ -591,7 +607,7 @@ def _get_constraint_callables(self):
         return constraint_callables
 
     @property
-    def _tkwargs(self):
+    def tkwargs(self):
         # set device and data type for generator
         device = "cpu"
         if self.use_cuda:
@@ -627,7 +643,7 @@ def _candidate_names(self):
 
     def _get_bounds(self):
         """convert bounds from vocs to torch tensors"""
-        return torch.tensor(self.vocs.bounds, **self._tkwargs)
+        return torch.tensor(self.vocs.bounds, **self.tkwargs)
 
     def _get_optimization_bounds(self):
         """
@@ -720,7 +736,7 @@ def _get_max_travel_distances_region(self, bounds):
                 "from, add data first to use during BO"
             )
         last_point = torch.tensor(
-            self.data[self.vocs.variable_names].iloc[-1].to_numpy(), **self._tkwargs
+            self.data[self.vocs.variable_names].iloc[-1].to_numpy(), **self.tkwargs
         )
 
         # bound lengths based on vocs for normalization
@@ -728,8 +744,8 @@ def _get_max_travel_distances_region(self, bounds):
 
         # get maximum travel distances
         max_travel_distances = torch.tensor(
-            self.max_travel_distances, **self._tkwargs
-        ) * torch.tensor(lengths, **self._tkwargs)
+            self.max_travel_distances, **self.tkwargs
+        ) * torch.tensor(lengths, **self.tkwargs)
         max_travel_bounds = torch.stack(
             (last_point - max_travel_distances, last_point + max_travel_distances)
         )
@@ -774,33 +790,54 @@ def torch_reference_point(self):
                     supported"
                 )
 
-        return torch.tensor(pt, **self._tkwargs)
+        return torch.tensor(pt, **self.tkwargs)
 
-    def calculate_hypervolume(self):
-        """compute hypervolume given data"""
-        objective_data = torch.tensor(
-            self.vocs.objective_data(self.data, return_raw=True).to_numpy()
+    def _get_scaled_data(self):
+        """get scaled input/objective data for use with botorch logic which assumes
+        maximization for each objective"""
+        var_df, obj_df, _, _ = self.vocs.extract_data(
+            self.data, return_valid=True, return_raw=True
         )
 
-        # hypervolume must only take into account feasible data points
-        if self.vocs.n_constraints > 0:
-            objective_data = objective_data[
-                self.vocs.feasibility_data(self.data)["feasible"].to_list()
-            ]
+        variable_data = torch.tensor(var_df[self.vocs.variable_names].to_numpy())
+        objective_data = torch.tensor(obj_df[self.vocs.objective_names].to_numpy())
+        weights = set_botorch_weights(self.vocs).to(**self.tkwargs)[
+            : self.vocs.n_objectives
+        ]
+        return variable_data, objective_data * weights, weights
 
-        n_objectives = self.vocs.n_objectives
-        weights = torch.zeros(n_objectives)
-        weights = set_botorch_weights(self.vocs).to(**self._tkwargs)
-        objective_data = objective_data * weights
+    def calculate_hypervolume(self):
+        """compute hypervolume given data"""
 
         # compute hypervolume
         bd = DominatedPartitioning(
-            ref_point=self.torch_reference_point, Y=objective_data
+            ref_point=self.torch_reference_point, Y=self._get_scaled_data()[1]
         )
         volume = bd.compute_hypervolume().item()
 
         return volume
 
+    def get_pareto_front(self):
+        """compute the pareto front x/y values given data"""
+        variable_data, objective_data, weights = self._get_scaled_data()
+        obj_data = torch.vstack(
+            (self.torch_reference_point.unsqueeze(0), objective_data)
+        )
+        var_data = torch.vstack(
+            (
+                torch.full_like(variable_data[0], float("Nan")).unsqueeze(0),
+                variable_data,
+            )
+        )
+        non_dominated = is_non_dominated(obj_data)
+
+        # note need to undo weights for real number output
+        # only return values if non nan values exist
+        if torch.all(torch.isnan(var_data[non_dominated])):
+            return None, None
+        else:
+            return var_data[non_dominated], obj_data[non_dominated] / weights
+
 
 def formatted_base_docstring():
     return "\nBase Generator\n---------------\n" + BayesianGenerator.__doc__
diff --git a/xopt/generators/bayesian/expected_improvement.py b/xopt/generators/bayesian/expected_improvement.py
index 1267b271..8c179b4a 100644
--- a/xopt/generators/bayesian/expected_improvement.py
+++ b/xopt/generators/bayesian/expected_improvement.py
@@ -40,7 +40,7 @@ def _get_acquisition(self, model):
             # analytic acquisition function for single candidate generation with
             # basic objective
             # note that the analytic version cannot handle custom objectives
-            weights = set_botorch_weights(self.vocs).to(**self._tkwargs)
+            weights = set_botorch_weights(self.vocs).to(**self.tkwargs)
             posterior_transform = ScalarizedPosteriorTransform(weights)
             acq = ExpectedImprovement(
                 model, best_f=best_f, posterior_transform=posterior_transform
@@ -52,14 +52,12 @@ def _get_best_f(self, data, objective):
         """get best function value for EI based on the objective"""
         if isinstance(objective, CustomXoptObjective):
             best_f = objective(
-                torch.tensor(
-                    self.vocs.observable_data(data).to_numpy(), **self._tkwargs
-                )
+                torch.tensor(self.vocs.observable_data(data).to_numpy(), **self.tkwargs)
             ).max()
         else:
             # analytic acquisition function for single candidate generation
             best_f = -torch.tensor(
-                self.vocs.objective_data(data).min().values, **self._tkwargs
+                self.vocs.objective_data(data).min().values, **self.tkwargs
             )
 
         return best_f
diff --git a/xopt/generators/bayesian/mggpo.py b/xopt/generators/bayesian/mggpo.py
index 31968db6..24112638 100644
--- a/xopt/generators/bayesian/mggpo.py
+++ b/xopt/generators/bayesian/mggpo.py
@@ -32,7 +32,7 @@ def propose_candidates(self, model, n_candidates=1):
         ga_candidates = self.ga_generator.generate(n_candidates * 10)
         ga_candidates = pd.DataFrame(ga_candidates)[self.vocs.variable_names].to_numpy()
         ga_candidates = torch.unique(
-            torch.tensor(ga_candidates, **self._tkwargs), dim=0
+            torch.tensor(ga_candidates, **self.tkwargs), dim=0
         ).reshape(-1, 1, self.vocs.n_variables)
 
         if ga_candidates.shape[0] < n_candidates:
diff --git a/xopt/generators/bayesian/mobo.py b/xopt/generators/bayesian/mobo.py
index c82df309..7e399999 100644
--- a/xopt/generators/bayesian/mobo.py
+++ b/xopt/generators/bayesian/mobo.py
@@ -1,16 +1,29 @@
+from typing import Union
+
+import torch
 from botorch.acquisition import FixedFeatureAcquisitionFunction
 from botorch.acquisition.multi_objective import qNoisyExpectedHypervolumeImprovement
 from botorch.acquisition.multi_objective.logei import (
     qLogNoisyExpectedHypervolumeImprovement,
 )
+from botorch.utils import draw_sobol_samples
+from pydantic import Field
+from torch import Tensor
 
 from xopt.generators.bayesian.bayesian_generator import MultiObjectiveBayesianGenerator
 
 from xopt.generators.bayesian.objectives import create_mobo_objective
+from xopt.numerical_optimizer import LBFGSOptimizer
 
 
 class MOBOGenerator(MultiObjectiveBayesianGenerator):
     name = "mobo"
+    supports_batch_generation: bool = True
+    use_pf_as_initial_points: bool = Field(
+        False,
+        description="flag to specify if pareto front points are to be used during "
+        "optimization of the acquisition function",
+    )
     __doc__ = """Implements Multi-Objective Bayesian Optimization using the Expected
             Hypervolume Improvement acquisition function"""
 
@@ -64,3 +77,59 @@ def _get_acquisition(self, model):
         )
 
         return acq
+
+    def _get_initial_conditions(self, n_candidates=1) -> Union[Tensor, None]:
+        """
+        generate initial candidates for optimizing the acquisition function based on
+        the pareto front
+
+        If `use_pf_as_initial_points` flag is set to true then the current
+        Pareto-optimal set is used as initial points for optimizing the acquisition
+        function instead of randomly selected points (random points fill in the set
+        if `num_restarts` is greater than the number of points in the Pareto set).
+
+        Returns:
+            A `num_restarts x q x d` tensor of initial conditions.
+
+        """
+        if self.use_pf_as_initial_points:
+            if isinstance(self.numerical_optimizer, LBFGSOptimizer):
+                bounds = self._get_optimization_bounds()
+                num_restarts = self.numerical_optimizer.n_restarts
+
+                pf_locations, _ = self.get_pareto_front()
+
+                # if there is no pareto front just return None to revert back to
+                # default behavior
+                if pf_locations is None:
+                    return None
+
+                initial_points = torch.clone(pf_locations)
+
+                # add the q dimension
+                initial_points = initial_points.unsqueeze(1)
+                initial_points = initial_points.expand([-1, n_candidates, -1])
+
+                # initial_points must equal the number of restarts
+                if len(initial_points) < num_restarts:
+                    # add random points to the list inside the bounds
+                    sobol_samples = draw_sobol_samples(
+                        bounds, num_restarts - len(initial_points), n_candidates
+                    )
+
+                    initial_points = torch.cat([initial_points, sobol_samples])
+                elif len(initial_points) > num_restarts:
+                    # if there are too many select the first `num_restarts` points at
+                    # random
+                    initial_points = initial_points[
+                        torch.randperm(len(initial_points))
+                    ][:num_restarts]
+
+                return initial_points
+            else:
+                raise RuntimeWarning(
+                    "cannot use PF as initial optimization points "
+                    "for non-LBFGS optimizers, ignoring flag"
+                )
+
+        return None
diff --git a/xopt/generators/bayesian/multi_fidelity.py b/xopt/generators/bayesian/multi_fidelity.py
index 1bc97d16..d7e19488 100644
--- a/xopt/generators/bayesian/multi_fidelity.py
+++ b/xopt/generators/bayesian/multi_fidelity.py
@@ -155,7 +155,7 @@ def get_optimum(self):
         """select the best point at the maximum fidelity"""
 
         # define single objective based on vocs
-        weights = torch.zeros(self.vocs.n_outputs, **self._tkwargs)
+        weights = torch.zeros(self.vocs.n_outputs, **self.tkwargs)
         for idx, ele in enumerate(self.vocs.objective_names):
             if self.vocs.objectives[ele] == "MINIMIZE":
                 weights[idx] = -1.0
diff --git a/xopt/generators/bayesian/time_dependent.py b/xopt/generators/bayesian/time_dependent.py
index 2d5ab922..2ed89d52 100644
--- a/xopt/generators/bayesian/time_dependent.py
+++ b/xopt/generators/bayesian/time_dependent.py
@@ -95,7 +95,7 @@ def get_input_data(self, data: pd.DataFrame) -> torch.Tensor:
         type and device settings from the generator.
         """
         return torch.tensor(
-            data[self.model_input_names + ["time"]].to_numpy(), **self._tkwargs
+            data[self.model_input_names + ["time"]].to_numpy(), **self.tkwargs
         )
 
     def get_acquisition(self, model):
@@ -103,7 +103,7 @@ def get_acquisition(self, model):
 
         # identify which column has the `time` attribute
         column = [-1]
-        value = torch.tensor(self.target_prediction_time, **self._tkwargs).unsqueeze(0)
+        value = torch.tensor(self.target_prediction_time, **self.tkwargs).unsqueeze(0)
         fixed_acq = FixedFeatureAcquisitionFunction(
             acq, self.vocs.n_variables + 1, column, value
         )
diff --git a/xopt/generators/bayesian/turbo.py b/xopt/generators/bayesian/turbo.py
index d8e3c6cc..ea5b4441 100644
--- a/xopt/generators/bayesian/turbo.py
+++ b/xopt/generators/bayesian/turbo.py
@@ -1,11 +1,12 @@
 import logging
 import math
 from abc import ABC, abstractmethod
-from typing import Dict, Optional, Union
+from typing import Dict, Optional
 
 import pandas as pd
 import torch
 from pydantic import ConfigDict, Field, PositiveFloat, PositiveInt
+from torch import Tensor
 
 from xopt.pydantic import XoptBaseModel
 from xopt.vocs import VOCS
@@ -51,7 +52,6 @@ class TurboController(XoptBaseModel, ABC):
     restrict_model_data: Optional[bool] = Field(
         True, description="flag to restrict model data to within the trust region"
     )
-    tkwargs: Dict[str, Union[torch.dtype, str]] = Field({"dtype": torch.double})
 
     model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True)
 
@@ -79,9 +79,24 @@ def __init__(self, vocs: VOCS, **kwargs):
                 )
             )
 
-    def get_trust_region(self, generator):
+    def get_trust_region(self, generator) -> Tensor:
+        """
+
+        Return the trust region based on the generator. The trust region is a
+        rectangular region around a center point. The sides of the trust region are
+        given by the `length` parameter and are scaled according to the generator
+        model lengthscales (if available).
+
+        Parameters
+        ----------
+        generator : BayesianGenerator
+            Generator object used to supply the model and datatypes for the returned
+            trust region.
+
+        """
+
         model = generator.model
-        bounds = torch.tensor(self.vocs.bounds, **self.tkwargs)
+        bounds = torch.tensor(self.vocs.bounds, **generator.tkwargs)
 
         if self.center_x is not None:
             # get bounds width
@@ -89,18 +104,22 @@ def get_trust_region(self, generator):
 
             # Scale the TR to be proportional to the lengthscales of the objective model
             x_center = torch.tensor(
-                [self.center_x[ele] for ele in self.vocs.variable_names], **self.tkwargs
+                [self.center_x[ele] for ele in self.vocs.variable_names],
+                **generator.tkwargs,
             ).unsqueeze(dim=0)
 
-            if model.models[0].covar_module.base_kernel.lengthscale is not None:
-                lengthscales = model.models[
-                    0
-                ].covar_module.base_kernel.lengthscale.detach()
+            # default weights are 1 (if there is no model or a model without
+            # lengthscales)
+            weights = 1.0
 
-                # calculate the ratios of lengthscales for each axis
-                weights = lengthscales / torch.prod(lengthscales) ** (1 / self.dim)
-            else:
-                weights = 1.0
+            if model is not None:
+                if model.models[0].covar_module.base_kernel.lengthscale is not None:
+                    lengthscales = model.models[
+                        0
+                    ].covar_module.base_kernel.lengthscale.detach()
+
+                    # calculate the ratios of lengthscales for each axis
+                    weights = lengthscales / torch.prod(lengthscales) ** (1 / self.dim)
 
             # calculate the tr bounding box
             tr_lb = torch.clamp(
diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py
index 9d60ac04..0b78d833 100644
--- a/xopt/generators/bayesian/utils.py
+++ b/xopt/generators/bayesian/utils.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 from typing import List
 
 import numpy as np
@@ -206,16 +207,19 @@ def validate_turbo_controller_base(value, available_controller_types, info):
                 f"{available_controller_types.keys()}"
             )
     elif isinstance(value, dict):
+        value_copy = deepcopy(value)
         # create turbo controller from dict input
         if "name" not in value:
             raise ValueError("turbo input dict needs to have a `name` attribute")
-        name = value.pop("name")
+        name = value_copy.pop("name")
         if name in available_controller_types:
             # pop unnecessary elements
             for ele in ["dim"]:
-                value.pop(ele, None)
+                value_copy.pop(ele, None)
 
-            value = available_controller_types[name](vocs=info.data["vocs"], **value)
+            value = available_controller_types[name](
+                vocs=info.data["vocs"], **value_copy
+            )
         else:
             raise ValueError(
                 f"{value} not found, available values are "
diff --git a/xopt/generators/ga/cnsga.py b/xopt/generators/ga/cnsga.py
index 701ffac5..6e3d4598 100644
--- a/xopt/generators/ga/cnsga.py
+++ b/xopt/generators/ga/cnsga.py
@@ -123,7 +123,8 @@ def write_offspring(self, filename=None):
             return
 
         if filename is None:
-            filename = f"{self.name}_offspring_{xopt.utils.isotime(include_microseconds=True)}.csv"
+            timestamp = xopt.utils.isotime(include_microseconds=True).replace(":", "_")
+            filename = f"{self.name}_offspring_{timestamp}.csv"
             filename = os.path.join(self.output_path, filename)
 
         self._offspring.to_csv(filename, index_label="xopt_index")
@@ -139,7 +140,8 @@ def write_population(self, filename=None):
             return
 
         if filename is None:
-            filename = f"{self.name}_population_{xopt.utils.isotime(include_microseconds=True)}.csv"
+            timestamp = xopt.utils.isotime(include_microseconds=True).replace(":", "_")
+            filename = f"{self.name}_population_{timestamp}.csv"
             filename = os.path.join(self.output_path, filename)
 
         self.population.to_csv(filename, index_label="xopt_index")
diff --git a/xopt/numerical_optimizer.py b/xopt/numerical_optimizer.py
index b48e80f1..c697ae78 100644
--- a/xopt/numerical_optimizer.py
+++ b/xopt/numerical_optimizer.py
@@ -15,7 +15,9 @@ class NumericalOptimizer(XoptBaseModel, ABC):
     model_config = ConfigDict(extra="forbid")
 
     @abstractmethod
-    def optimize(self, function: AcquisitionFunction, bounds: Tensor, n_candidates=1):
+    def optimize(
+        self, function: AcquisitionFunction, bounds: Tensor, n_candidates=1, **kwargs
+    ):
         """optimize a function to produce a number of candidate points that
         minimize the function"""
         pass
@@ -33,7 +35,7 @@ class LBFGSOptimizer(NumericalOptimizer):
 
     model_config = ConfigDict(validate_assignment=True)
 
-    def optimize(self, function, bounds, n_candidates=1):
+    def optimize(self, function, bounds, n_candidates=1, **kwargs):
         assert isinstance(bounds, Tensor)
         if len(bounds) != 2:
             raise ValueError("bounds must have the shape [2, ndim]")
@@ -46,6 +48,7 @@ def optimize(self, function, bounds, n_candidates=1):
             num_restarts=self.n_restarts,
             timeout_sec=self.max_time,
             options={"maxiter": self.max_iter},
+            **kwargs,
         )
         return candidates
 
diff --git a/xopt/resources/test_functions/zdt.py b/xopt/resources/test_functions/zdt.py
new file mode 100644
index 00000000..cae2a8e9
--- /dev/null
+++ b/xopt/resources/test_functions/zdt.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+from xopt import VOCS
+
+
+def construct_zdt(n_dims, problem_index=1):
+    """construct Xopt versions of the multiobjective ZDT test problems"""
+    vocs = VOCS(
+        variables={f"x{i + 1}": [0, 1] for i in range(n_dims)},
+        objectives={"f1": "MINIMIZE", "f2": "MINIMIZE"},
+    )
+
+    if problem_index == 1:
+        # ZDT1
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - np.sqrt(f1 / g)
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    elif problem_index == 2:
+
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - (f1 / g) ** 2
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    elif problem_index == 3:
+
+        def evaluate(input_dict):
+            x = np.array([input_dict[f"x{i + 1}"] for i in range(n_dims)])
+
+            f1 = x[0]
+            g = 1 + (9 / (n_dims - 1)) * np.sum(x[1:])
+            h = 1 - np.sqrt(f1 / g) - (f1 / g) * np.sin(10 * np.pi * f1)
+            f2 = g * h
+
+            return {"f1": f1, "f2": f2, "g": g}
+    else:
+        raise NotImplementedError()
+
+    reference_point = {"f1": 1.0, "f2": 1.0}
+
+    return vocs, evaluate, reference_point
diff --git a/xopt/tests/generators/bayesian/test_mobo.py b/xopt/tests/generators/bayesian/test_mobo.py
index 762d3ab0..fd8229ca 100644
--- a/xopt/tests/generators/bayesian/test_mobo.py
+++ b/xopt/tests/generators/bayesian/test_mobo.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+import torch
 from botorch.acquisition.multi_objective.logei import (
     qLogNoisyExpectedHypervolumeImprovement,
 )
@@ -46,6 +47,83 @@ def test_script(self):
             X.random_evaluate(3)
             X.step()
 
+    def test_parallel(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+
+        gen.generate(2)
+
+    def test_pareto_front_calculation(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+
+        pfx, pfy = gen.get_pareto_front()
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2, 0.4], [0.1, 0.2, 0.2]], dtype=torch.double).T, pfx
+        )
+        assert torch.allclose(
+            torch.tensor([[1.0, 2.0, 0.0], [0.5, 0.1, 1.5]], dtype=torch.double).T, pfy
+        )
+
+        # test with constraints
+        vocs.constraints = {"c1": ["GREATER_THAN", 0.5]}
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+                "c1": [1.0, 1.0, 1.0, 0.0],
+            }
+        )
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+        pfx, pfy = gen.get_pareto_front()
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2], [0.1, 0.2]], dtype=torch.double).T, pfx
+        )
+        assert torch.allclose(
+            torch.tensor([[1.0, 2.0], [0.5, 0.1]], dtype=torch.double).T, pfy
+        )
+
     def test_hypervolume_calculation(self):
         vocs = deepcopy(TEST_VOCS_BASE)
         vocs.objectives.update({"y2": "MINIMIZE"})
@@ -71,6 +149,80 @@ def test_hypervolume_calculation(self):
 
         assert gen.calculate_hypervolume() == 0.0
 
+    def test_initial_conditions(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.objectives.update({"y2": "MINIMIZE"})
+        vocs.constraints = {}
+
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "y1": [1.0, 2.0, 1.0, 0.0],
+                "y2": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+        reference_point = {"y1": 10.0, "y2": 1.5}
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.numerical_optimizer.max_time = 1.0
+        gen.add_data(test_data)
+        initial_points = gen._get_initial_conditions()
+
+        assert torch.allclose(
+            torch.tensor([[0.1, 0.2, 0.4], [0.1, 0.2, 0.2]], dtype=torch.double).T,
+            initial_points[:3].squeeze(),
+        )
+        assert len(initial_points) == gen.numerical_optimizer.n_restarts
+        gen.generate(1)
+
+        # try with a small number of n_restarts
+        gen.numerical_optimizer.n_restarts = 1
+        initial_points = gen._get_initial_conditions()
+        assert len(initial_points) == 1
+        gen.generate(1)
+
+        # try with no points on the pareto front
+        gen.reference_point = {"y1": 0.0, "y2": 0.0}
+        gen.numerical_optimizer.n_restarts = 20
+
+        initial_points = gen._get_initial_conditions()
+        assert initial_points is None
+        gen.generate(1)
+
+        # test with constraints
+        vocs.constraints = {"c1": ["GREATER_THAN", 0.5]}
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4, 0.15],
+                "x2": [0.1, 0.2, 0.3, 0.2, 0.15],
+                "y1": [1.0, 2.0, 1.0, 0.0, 1.5],
+                "y2": [0.5, 0.1, 1.0, 1.5, 0.25],
+                "c1": [1.0, 1.0, 1.0, 1.0, 0.0],
+            }
+        )
+        gen = MOBOGenerator(
+            vocs=vocs,
+            reference_point=reference_point,
+            use_pf_as_initial_points=True,
+        )
+        gen.add_data(test_data)
+        gen.numerical_optimizer.max_time = 1.0
+
+        # make sure that no invalid points make it into the initial conditions
+        ic = gen._get_initial_conditions()
+        assert not torch.allclose(
+            ic[:4],
+            torch.tensor(((0.1, 0.1), (0.2, 0.2), (0.4, 0.2), (0.15, 0.15)))
+            .reshape(4, 1, 2)
+            .double(),
+        )
+
+        gen.generate(1)
+
     def test_log_mobo(self):
         evaluator = Evaluator(function=evaluate_TNK)
         reference_point = tnk_reference_point
@@ -87,6 +239,7 @@ def test_log_mobo(self):
             dump = ele.model_dump()
             generator = MOBOGenerator(vocs=tnk_vocs, **dump)
             X = Xopt(generator=generator, evaluator=evaluator, vocs=tnk_vocs)
+            X.generator.numerical_optimizer.max_iter = 1
             X.random_evaluate(3)
             X.step()
 
diff --git a/xopt/tests/generators/bayesian/test_turbo.py b/xopt/tests/generators/bayesian/test_turbo.py
index abea8e70..a5b5280c 100644
--- a/xopt/tests/generators/bayesian/test_turbo.py
+++ b/xopt/tests/generators/bayesian/test_turbo.py
@@ -1,11 +1,15 @@
 import math
+import os
 from copy import deepcopy
 from unittest import TestCase
 from unittest.mock import patch
+import json
 
 import numpy as np
 import pandas as pd
 import pytest
+import yaml
+
 from xopt import Evaluator, VOCS, Xopt
 from xopt.generators.bayesian import UpperConfidenceBoundGenerator
 from xopt.generators.bayesian.bax.algorithms import GridOptimize
@@ -72,6 +76,12 @@ def test_turbo_validation(self):
                 vocs=test_vocs, turbo_controller=EntropyTurboController(test_vocs)
             )
 
+        # test validation from serialized turbo controller
+        gen = BayesianGenerator(vocs=test_vocs, turbo_controller=turbo_controller)
+        gen.add_data(TEST_VOCS_DATA)
+        gen_dict = json.loads(gen.to_json())
+        gen.from_dict(gen_dict | {"vocs": test_vocs})
+
     @patch.multiple(BayesianGenerator, __abstractmethods__=set())
     def test_get_trust_region(self):
         # test in 1D
@@ -341,12 +351,27 @@ def test_serialization(self):
         evaluator = Evaluator(function=sin_function)
         for name in ["optimize", "safety"]:
             generator = UpperConfidenceBoundGenerator(vocs=vocs, turbo_controller=name)
-            X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
+            X = Xopt(
+                evaluator=evaluator,
+                generator=generator,
+                vocs=vocs,
+                dump_file="dump.yml",
+            )
 
             yaml_str = X.yaml()
             X2 = Xopt.from_yaml(yaml_str)
             assert X2.generator.turbo_controller.name == name
 
+            X2.random_evaluate(3)
+            X2.step()
+
+            config = yaml.safe_load(open("dump.yml"))
+
+            # test restart
+            X3 = Xopt.model_validate(config)
+            X3.random_evaluate(3)
+            X3.step()
+
     def test_entropy_turbo(self):
         # define variables and function objectives
         vocs = VOCS(
@@ -354,7 +379,7 @@ def test_entropy_turbo(self):
             observables=["y1"],
         )
 
-        def sin_function(input_dict):
+        def basic_sin_function(input_dict):
             return {"y1": np.sin(input_dict["x"])}
 
         # Prepare BAX algorithm and generator options
@@ -370,7 +395,7 @@ def sin_function(input_dict):
         )
 
         # construct evaluator
-        evaluator = Evaluator(function=sin_function)
+        evaluator = Evaluator(function=basic_sin_function)
 
         # construct Xopt optimizer
         X = Xopt(evaluator=evaluator, generator=generator, vocs=vocs)
@@ -387,3 +412,11 @@ def sin_function(input_dict):
                 algorithm=algorithm,
                 turbo_controller=OptimizeTurboController(vocs),
             )
+
+    @pytest.fixture(scope="module", autouse=True)
+    def clean_up(self):
+        yield
+        files = ["dump.yml"]
+        for f in files:
+            if os.path.exists(f):
+                os.remove(f)
diff --git a/xopt/tests/test_numerical_optimizer.py b/xopt/tests/test_numerical_optimizer.py
index 41271576..024d4e20 100644
--- a/xopt/tests/test_numerical_optimizer.py
+++ b/xopt/tests/test_numerical_optimizer.py
@@ -1,3 +1,4 @@
+import time
 from copy import deepcopy
 from unittest.mock import patch
 
@@ -29,11 +30,13 @@ def test_lbfgs_optimizer(self):
                 assert candidates.shape == torch.Size([ncandidate, ndim])
 
         # test max time
-        max_time_optimizer = LBFGSOptimizer(max_time=0.1)
+        max_time_optimizer = LBFGSOptimizer(max_time=0.01)
         ndim = 1
         bounds = torch.stack((torch.zeros(ndim), torch.ones(ndim)))
         for ncandidate in [1, 3]:
+            start_time = time.time()
             candidates = max_time_optimizer.optimize(f, bounds, ncandidate)
+            assert time.time() - start_time < 0.01
             assert candidates.shape == torch.Size([ncandidate, ndim])
 
     def test_grid_optimizer(self):
diff --git a/xopt/tests/test_vocs.py b/xopt/tests/test_vocs.py
index fcc0486f..f85ff519 100644
--- a/xopt/tests/test_vocs.py
+++ b/xopt/tests/test_vocs.py
@@ -346,3 +346,37 @@ def test_normalize_inputs(self):
             vocs.denormalize_inputs(normed_data)[vocs.variable_names].to_numpy(),
             test_data[vocs.variable_names].to_numpy(),
         ).all()
+
+    def test_extract_data(self):
+        vocs = deepcopy(TEST_VOCS_BASE)
+        test_data = pd.DataFrame(
+            {
+                "x1": [0.1, 0.2, 0.4, 0.4],
+                "x2": [0.1, 0.2, 0.3, 0.2],
+                "c1": [1.0, 0.0, 1.0, 0.0],
+                "y1": [0.5, 0.1, 1.0, 1.5],
+            }
+        )
+
+        # test default functionality
+        data = vocs.extract_data(test_data)
+        assert set(data[0].keys()) == {"x1", "x2"}
+        assert set(data[1].keys()) == {"y1"}
+        assert set(data[2].keys()) == {"c1"}
+        for ele in data[:-1]:  # ignore observable data
+            assert len(ele) == 4
+
+        # test return_valid functionality
+        data = vocs.extract_data(test_data, return_valid=True)
+        assert data[0]["x1"].to_list() == [0.1, 0.4]
+        assert data[1]["y1"].to_list() == [0.5, 1.0]
+
+        for ele in data[:-1]:  # ignore observable data
+            assert len(ele) == 2
+
+        # test w/o constraints
+        vocs = deepcopy(TEST_VOCS_BASE)
+        vocs.constraints = {}
+        data = vocs.extract_data(test_data)
+        assert len(data[0]) == 4
+        assert data[2].empty
diff --git a/xopt/vocs.py b/xopt/vocs.py
index 8dbb4f4a..3b67563c 100644
--- a/xopt/vocs.py
+++ b/xopt/vocs.py
@@ -530,7 +530,7 @@ def validate_input_data(self, input_points: pd.DataFrame) -> None:
         """
         validate_input_data(self, input_points)
 
-    def extract_data(self, data: pd.DataFrame, return_raw=False):
+    def extract_data(self, data: pd.DataFrame, return_raw=False, return_valid=False):
         """
         split dataframe into seperate dataframes for variables, objectives and
         constraints based on vocs - objective data is transformed based on
@@ -542,6 +542,9 @@ def extract_data(self, data: pd.DataFrame, return_raw=False):
                 Dataframe to be split
             return_raw : bool, optional
                 If True, return untransformed objective data
+            return_valid : bool, optional
+                If True, only return data that satisfies all of the contraint
+                conditions.
 
         Returns
         -------
@@ -555,7 +558,18 @@ def extract_data(self, data: pd.DataFrame, return_raw=False):
         variable_data = self.variable_data(data, "")
         objective_data = self.objective_data(data, "", return_raw)
         constraint_data = self.constraint_data(data, "")
-        return variable_data, objective_data, constraint_data
+        observable_data = self.observable_data(data, "")
+
+        if return_valid:
+            feasible_status = self.feasibility_data(data)["feasible"]
+            return (
+                variable_data[feasible_status],
+                objective_data[feasible_status],
+                constraint_data[feasible_status],
+                observable_data[feasible_status],
+            )
+
+        return variable_data, objective_data, constraint_data, observable_data
 
     def select_best(self, data: pd.DataFrame, n: int = 1):
         """