Skip to content

Commit

Permalink
Merge branch 'main' into generator_standard
Browse files Browse the repository at this point in the history
  • Loading branch information
roussel-ryan committed Nov 13, 2024
2 parents 4283ae7 + 55cb819 commit fc6342d
Show file tree
Hide file tree
Showing 20 changed files with 511 additions and 72 deletions.
9 changes: 7 additions & 2 deletions .github/actions/conda-setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,18 @@ runs:
- name: Update environment
shell: bash -l {0}
run: |
if [ -f "${{ inputs.filename }}" ]; then
if [ -f "${{ inputs.filename }}" ] && ! [ "${{ steps.cache.outputs.cache-hit }}" ] ; then
mamba env update -n ${{ inputs.env_name }} -f ${{ inputs.filename }}
else
echo "No conda environment file found; skipping. Path: ${{ inputs.filename }}"
mamba install -n ${{ inputs.env_name }} python=${{ inputs.python-version }}
fi
if: steps.cache.outputs.cache-hit != 'true'
- name: Install required binaries for MPI
shell: bash -l {0}
run: |
if ! grep -q ${{ inputs.filename }} ; then
sudo apt install libopenmpi-dev
fi
- name: Setup the environment
shell: bash -l {0}
run: |
Expand Down
8 changes: 2 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies:
- ipywidgets
- tqdm
- orjson
- matplotlib
# parallel
- mpi4py
- dask
Expand All @@ -24,16 +25,11 @@ dependencies:
- jupyterlab>=3
- jupyterlab-lsp
- python-lsp-server
- matplotlib
- pygments
- mkdocs
- mkdocstrings
- mkdocs-material
# NOTE: we are installing mkdocs-jupyter with pip for now
# due to the following: https://github.com/conda-forge/mkdocs-jupyter-feedstock/issues/31
# - mkdocs-jupyter
- mkdocs-jupyter
- mkdocstrings-python
- ruff
- typing-extensions
- pip:
- mkdocs-jupyter>=0.24.7
22 changes: 20 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,17 @@ classifiers = [
"Topic :: Scientific/Engineering",
]
dependencies = [
# All core dependencies must be sourced from conda (conda-forge).
# See ``environment.yml`` for further information.
"deap",
"numpy",
"pydantic>=2.3",
"pyyaml",
"botorch>=0.9.2,<=0.10.0",
"scipy>=1.10.1",
"pandas",
"ipywidgets",
"tqdm",
"orjson",
"matplotlib"
]
description = "Flexible optimization of arbitrary problems in Python."
dynamic = [ "version" ]
Expand All @@ -31,6 +40,15 @@ requires-python = ">=3.9"
dev = [
"pytest",
"pytest-cov",
"ffmpeg",
"pytest",
"pytest-cov",
"jupyterlab>=3",
"jupyterlab-lsp",
"python-lsp-server",
"pygments",
"dask",
"mpi4py"
]
doc = [
"mkdocs",
Expand Down
2 changes: 1 addition & 1 deletion xopt/generators/bayesian/bax/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def visualize_virtual_objective(
bounds = generator._get_optimization_bounds()
kwargs = kwargs if kwargs else {}
objective_values = generator.algorithm.evaluate_virtual_objective(
bax_model, x, bounds, tkwargs=generator._tkwargs, n_samples=n_samples, **kwargs
bax_model, x, bounds, tkwargs=generator.tkwargs, n_samples=n_samples, **kwargs
)

# get sample stats
Expand Down
89 changes: 63 additions & 26 deletions xopt/generators/bayesian/bayesian_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import warnings
from abc import ABC, abstractmethod
from copy import deepcopy
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union

import pandas as pd
import torch
from botorch.acquisition import FixedFeatureAcquisitionFunction, qUpperConfidenceBound
from botorch.models.model import Model
from botorch.sampling import get_sampler
from botorch.utils.multi_objective import is_non_dominated
from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning
from gpytorch import Module
from pydantic import Field, field_validator, PositiveInt, SerializeAsAny
Expand Down Expand Up @@ -384,7 +385,7 @@ def train_model(self, data: pd.DataFrame = None, update_internal=True) -> Module
self.vocs.output_names,
data,
{name: variable_bounds[name] for name in self.model_input_names},
**self._tkwargs,
**self.tkwargs,
)

if update_internal:
Expand All @@ -407,8 +408,18 @@ def propose_candidates(self, model, n_candidates=1):
# get acquisition function
acq_funct = self.get_acquisition(model)

# get candidates
candidates = self.numerical_optimizer.optimize(acq_funct, bounds, n_candidates)
# get initial candidates to start acquisition function optimization
initial_points = self._get_initial_conditions(n_candidates)

# get candidates -- grid optimizer does not support batch_initial_conditions
if isinstance(self.numerical_optimizer, GridOptimizer):
candidates = self.numerical_optimizer.optimize(
acq_funct, bounds, n_candidates
)
else:
candidates = self.numerical_optimizer.optimize(
acq_funct, bounds, n_candidates, batch_initial_conditions=initial_points
)
return candidates

def get_training_data(self, data: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -456,7 +467,7 @@ def get_input_data(self, data: pd.DataFrame) -> torch.Tensor:
input names (variables), and the resulting tensor is configured with the data
type and device settings from the generator.
"""
return torch.tensor(data[self.model_input_names].to_numpy(), **self._tkwargs)
return torch.tensor(data[self.model_input_names].to_numpy(), **self.tkwargs)

def get_acquisition(self, model):
"""
Expand Down Expand Up @@ -536,6 +547,11 @@ def visualize_model(self, **kwargs):
"""displays the GP models"""
return visualize_generator_model(self, **kwargs)

def _get_initial_conditions(self, n_candidates=1) -> Union[Tensor, None]:
"""overwrite if algorithm should specifiy initial candidates for optimizing
the acquisition function"""
return None

def _process_candidates(self, candidates: Tensor):
"""process pytorch candidates from optimizing the acquisition function"""
logger.debug(f"Best candidate from optimize {candidates}")
Expand Down Expand Up @@ -581,7 +597,7 @@ def _get_objective(self):

return self.custom_objective
else:
return create_mc_objective(self.vocs, self._tkwargs)
return create_mc_objective(self.vocs, self.tkwargs)

def _get_constraint_callables(self):
"""return constratint callable determined by vocs"""
Expand All @@ -591,7 +607,7 @@ def _get_constraint_callables(self):
return constraint_callables

@property
def _tkwargs(self):
def tkwargs(self):
# set device and data type for generator
device = "cpu"
if self.use_cuda:
Expand Down Expand Up @@ -627,7 +643,7 @@ def _candidate_names(self):

def _get_bounds(self):
"""convert bounds from vocs to torch tensors"""
return torch.tensor(self.vocs.bounds, **self._tkwargs)
return torch.tensor(self.vocs.bounds, **self.tkwargs)

def _get_optimization_bounds(self):
"""
Expand Down Expand Up @@ -720,16 +736,16 @@ def _get_max_travel_distances_region(self, bounds):
"from, add data first to use during BO"
)
last_point = torch.tensor(
self.data[self.vocs.variable_names].iloc[-1].to_numpy(), **self._tkwargs
self.data[self.vocs.variable_names].iloc[-1].to_numpy(), **self.tkwargs
)

# bound lengths based on vocs for normalization
lengths = self.vocs.bounds[1, :] - self.vocs.bounds[0, :]

# get maximum travel distances
max_travel_distances = torch.tensor(
self.max_travel_distances, **self._tkwargs
) * torch.tensor(lengths, **self._tkwargs)
self.max_travel_distances, **self.tkwargs
) * torch.tensor(lengths, **self.tkwargs)
max_travel_bounds = torch.stack(
(last_point - max_travel_distances, last_point + max_travel_distances)
)
Expand Down Expand Up @@ -774,33 +790,54 @@ def torch_reference_point(self):
supported"
)

return torch.tensor(pt, **self._tkwargs)
return torch.tensor(pt, **self.tkwargs)

def calculate_hypervolume(self):
"""compute hypervolume given data"""
objective_data = torch.tensor(
self.vocs.objective_data(self.data, return_raw=True).to_numpy()
def _get_scaled_data(self):
"""get scaled input/objective data for use with botorch logic which assumes
maximization for each objective"""
var_df, obj_df, _, _ = self.vocs.extract_data(
self.data, return_valid=True, return_raw=True
)

# hypervolume must only take into account feasible data points
if self.vocs.n_constraints > 0:
objective_data = objective_data[
self.vocs.feasibility_data(self.data)["feasible"].to_list()
]
variable_data = torch.tensor(var_df[self.vocs.variable_names].to_numpy())
objective_data = torch.tensor(obj_df[self.vocs.objective_names].to_numpy())
weights = set_botorch_weights(self.vocs).to(**self.tkwargs)[
: self.vocs.n_objectives
]
return variable_data, objective_data * weights, weights

n_objectives = self.vocs.n_objectives
weights = torch.zeros(n_objectives)
weights = set_botorch_weights(self.vocs).to(**self._tkwargs)
objective_data = objective_data * weights
def calculate_hypervolume(self):
"""compute hypervolume given data"""

# compute hypervolume
bd = DominatedPartitioning(
ref_point=self.torch_reference_point, Y=objective_data
ref_point=self.torch_reference_point, Y=self._get_scaled_data()[1]
)
volume = bd.compute_hypervolume().item()

return volume

def get_pareto_front(self):
"""compute the pareto front x/y values given data"""
variable_data, objective_data, weights = self._get_scaled_data()
obj_data = torch.vstack(
(self.torch_reference_point.unsqueeze(0), objective_data)
)
var_data = torch.vstack(
(
torch.full_like(variable_data[0], float("Nan")).unsqueeze(0),
variable_data,
)
)
non_dominated = is_non_dominated(obj_data)

# note need to undo weights for real number output
# only return values if non nan values exist
if torch.all(torch.isnan(var_data[non_dominated])):
return None, None
else:
return var_data[non_dominated], obj_data[non_dominated] / weights


def formatted_base_docstring():
return "\nBase Generator\n---------------\n" + BayesianGenerator.__doc__
8 changes: 3 additions & 5 deletions xopt/generators/bayesian/expected_improvement.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _get_acquisition(self, model):
# analytic acquisition function for single candidate generation with
# basic objective
# note that the analytic version cannot handle custom objectives
weights = set_botorch_weights(self.vocs).to(**self._tkwargs)
weights = set_botorch_weights(self.vocs).to(**self.tkwargs)
posterior_transform = ScalarizedPosteriorTransform(weights)
acq = ExpectedImprovement(
model, best_f=best_f, posterior_transform=posterior_transform
Expand All @@ -52,14 +52,12 @@ def _get_best_f(self, data, objective):
"""get best function value for EI based on the objective"""
if isinstance(objective, CustomXoptObjective):
best_f = objective(
torch.tensor(
self.vocs.observable_data(data).to_numpy(), **self._tkwargs
)
torch.tensor(self.vocs.observable_data(data).to_numpy(), **self.tkwargs)
).max()
else:
# analytic acquisition function for single candidate generation
best_f = -torch.tensor(
self.vocs.objective_data(data).min().values, **self._tkwargs
self.vocs.objective_data(data).min().values, **self.tkwargs
)

return best_f
Expand Down
2 changes: 1 addition & 1 deletion xopt/generators/bayesian/mggpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def propose_candidates(self, model, n_candidates=1):
ga_candidates = self.ga_generator.generate(n_candidates * 10)
ga_candidates = pd.DataFrame(ga_candidates)[self.vocs.variable_names].to_numpy()
ga_candidates = torch.unique(
torch.tensor(ga_candidates, **self._tkwargs), dim=0
torch.tensor(ga_candidates, **self.tkwargs), dim=0
).reshape(-1, 1, self.vocs.n_variables)

if ga_candidates.shape[0] < n_candidates:
Expand Down
Loading

0 comments on commit fc6342d

Please sign in to comment.