From c895a8d56efdd09ddd905768e9cc6e0cac55c42c Mon Sep 17 00:00:00 2001 From: Sait Cakmak Date: Fri, 13 Sep 2024 05:53:14 -0700 Subject: [PATCH] Apply input transforms when computing MLL in model closures (#2527) Summary: Pull Request resolved: https://github.com/pytorch/botorch/pull/2527 During model training, the input transforms are applied in `model.forward`. While evaluating the model closures, we pass in the train inputs to the `mll`, which passed them down to the `likelihood`. If we don't transform the inputs before passing them into `mll`, we end up evaluating `model.forward` and `likelihood` using different inputs. This is not an issue during the `posterior` evaluation, since the transforms are applied in `model.posterior` before being passed to `model.__call__` and `likelihood`. This diff updates the model closures to transform the inputs before passing them into `mll`. Fixes https://github.com/pytorch/botorch/issues/2515 Reviewed By: SebastianAment Differential Revision: D62497392 fbshipit-source-id: 9850c6529eea336589c2e2bbae400a4a9dc87f12 --- botorch/models/gp_regression.py | 9 +- botorch/optim/closures/model_closures.py | 27 +++-- test/optim/closures/test_model_closures.py | 120 ++++++++++++++++----- 3 files changed, 122 insertions(+), 34 deletions(-) diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py index 9c7f236549..4ffcbcfb15 100644 --- a/botorch/models/gp_regression.py +++ b/botorch/models/gp_regression.py @@ -345,12 +345,17 @@ def __init__( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0 ), ) + # Likelihood will always get evaluated with transformed X, so we need to + # transform the training data before constructing the noise model. + with torch.no_grad(): + transformed_X = self.transform_inputs( + X=train_X, input_transform=input_transform + ) noise_model = SingleTaskGP( - train_X=train_X, + train_X=transformed_X, train_Y=train_Yvar, likelihood=noise_likelihood, outcome_transform=Log(), - input_transform=input_transform, ) likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) # This is hacky -- this class used to inherit from SingleTaskGP, but it diff --git a/botorch/optim/closures/model_closures.py b/botorch/optim/closures/model_closures.py index e511d8b958..c3fa36091b 100644 --- a/botorch/optim/closures/model_closures.py +++ b/botorch/optim/closures/model_closures.py @@ -9,7 +9,6 @@ from __future__ import annotations from collections.abc import Sequence - from itertools import chain, repeat from types import NoneType from typing import Any, Callable, Optional @@ -174,9 +173,17 @@ def _get_loss_closure_exact_internal( r"""ExactMarginalLogLikelihood loss closure with internally managed data.""" def closure(**kwargs: Any) -> Tensor: - model_output = mll.model(*mll.model.train_inputs) + model = mll.model + # The inputs will get transformed in forward here. + model_output = model(*model.train_inputs) log_likelihood = mll( - model_output, mll.model.train_targets, *mll.model.train_inputs, **kwargs + model_output, + model.train_targets, + # During model training, the model inputs get transformed in the forward + # pass. The train_inputs property is not transformed yet, so we need to + # transform it before passing it to the likelihood for consistency. + *(model.transform_inputs(X=t_in) for t_in in model.train_inputs), + **kwargs, ) return -log_likelihood @@ -190,11 +197,19 @@ def _get_loss_closure_sum_internal( r"""SumMarginalLogLikelihood loss closure with internally managed data.""" def closure(**kwargs: Any) -> Tensor: - model_output = mll.model(*mll.model.train_inputs) + model = mll.model + # The inputs will get transformed in forward here. + model_output = model(*model.train_inputs) log_likelihood = mll( model_output, - mll.model.train_targets, - *map(list, mll.model.train_inputs), + model.train_targets, + # During model training, the model inputs get transformed in the forward + # pass. The train_inputs property is not transformed yet, so we need to + # transform it before passing it to the likelihood for consistency. + *( + (model.transform_inputs(X=t_in) for t_in in sub_t_in) + for sub_t_in in model.train_inputs + ), **kwargs, ) return -log_likelihood diff --git a/test/optim/closures/test_model_closures.py b/test/optim/closures/test_model_closures.py index 0d483bd819..13e8a1bdd6 100644 --- a/test/optim/closures/test_model_closures.py +++ b/test/optim/closures/test_model_closures.py @@ -17,35 +17,68 @@ ) from botorch.utils.testing import BotorchTestCase from gpytorch import settings as gpytorch_settings +from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood from gpytorch.mlls import ExactMarginalLogLikelihood, SumMarginalLogLikelihood +from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood +from gpytorch.module import Module +from torch import Tensor from torch.utils.data import DataLoader, TensorDataset +# Mock wrapping the __call__ directly is leading to errors like +# TypeError: super(type, obj): obj must be an instance or subtype of type +# so, doing this manually here. +class WrapperLikelihood(GaussianLikelihood): + def __init__(self, base_likelihood: GaussianLikelihood): + """A wrapper around a GaussianLikelihood that stores the call args.""" + Module.__init__(self) + self.base_likelihood = base_likelihood + self.call_args = [] + + def __call__(self, *args, **kwargs): + # Store the train inputs arg for testing. + self.call_args.append(args[1]) + return self.base_likelihood(*args, **kwargs) + + +def _get_mlls( + device: torch.device, wrap_likelihood: bool = False +) -> tuple[Tensor, list[MarginalLogLikelihood]]: + """Returns the train X, along two MLLs: one for a SingleTaskGP and + one for a ModelListGP. + + Args: + device: The device to use. + wrap_likelihood: If True, wrap the likelihood in a WrapperLikelihood. + This is useful for comparing call args later. + """ + with torch.random.fork_rng(): + torch.manual_seed(0) + # Inputs are not in the unit cube to ensure input transform is applied. + train_X = torch.linspace(0, 5, 10).unsqueeze(-1) + train_Y = torch.sin((2 * pi) * train_X) + train_Y = train_Y + 0.1 * torch.randn_like(train_Y) + mlls = [] + model = SingleTaskGP( + train_X=train_X, + train_Y=train_Y, + input_transform=Normalize(d=1), + outcome_transform=Standardize(m=1), + ) + if wrap_likelihood: + model.likelihood = WrapperLikelihood(model.likelihood) + mll = ExactMarginalLogLikelihood(model.likelihood, model) + mlls.append(mll.to(device=device, dtype=torch.double)) + + model = ModelListGP(model, model) + mll = SumMarginalLogLikelihood(model.likelihood, model) + mlls.append(mll.to(device=device, dtype=torch.double)) + return train_X.to(device=device, dtype=torch.double), mlls + + class TestLossClosures(BotorchTestCase): - def setUp(self): - super().setUp() - with torch.random.fork_rng(): - torch.manual_seed(0) - train_X = torch.linspace(0, 1, 10).unsqueeze(-1) - train_Y = torch.sin((2 * pi) * train_X) - train_Y = train_Y + 0.1 * torch.randn_like(train_Y) - - self.mlls = {} - model = SingleTaskGP( - train_X=train_X, - train_Y=train_Y, - input_transform=Normalize(d=1), - outcome_transform=Standardize(m=1), - ) - mll = ExactMarginalLogLikelihood(model.likelihood, model) - self.mlls[type(mll), type(model.likelihood), type(model)] = mll.to(self.device) - - model = ModelListGP(model, model) - mll = SumMarginalLogLikelihood(model.likelihood, model) - self.mlls[type(mll), type(model.likelihood), type(model)] = mll.to(self.device) - - def test_main(self): - for mll in self.mlls.values(): + def test_main(self) -> None: + for mll in _get_mlls(device=self.device)[1]: out = mll.model(*mll.model.train_inputs) loss = -mll(out, mll.model.train_targets).sum() loss.backward() @@ -63,8 +96,8 @@ def test_main(self): self.assertTrue(loss.equal(_loss)) self.assertTrue(all(a.equal(b) for a, b in zip_longest(grads, _grads))) - def test_data_loader(self): - for mll in self.mlls.values(): + def test_data_loader(self) -> None: + for mll in _get_mlls(device=self.device)[1]: if type(mll) is not ExactMarginalLogLikelihood: continue @@ -86,3 +119,38 @@ def test_data_loader(self): closure = get_loss_closure_with_grads(mll, params, data_loader=loader) with self.assertRaisesRegex(TypeError, "Expected .* a batch of tensors"): closure() + + def test_with_input_transforms(self) -> None: + # This test reproduces the bug reported in issue #2515. + train_X, mlls = _get_mlls(device=self.device, wrap_likelihood=True) + for mll in mlls: + if isinstance(mll, SumMarginalLogLikelihood): + # The likelihood is called twice here since it is the same + # likelihood in both child models. + likelihood = mll.model.models[0].likelihood + expected_calls1 = 2 # In the closure call. + expected_calls2 = 6 # Closure + posterior calls. + else: + likelihood = mll.model.likelihood + expected_calls1 = 1 # In the closure call. + expected_calls2 = 4 # Closure + posterior calls. + likelihood.call_args = [] # reset since it is shared between the models. + params = {n: p for n, p in mll.named_parameters() if p.requires_grad} + # Evaluate the closure to mimic the model fitting process. + mll.train() + closure = get_loss_closure_with_grads(mll, params) + closure() + self.assertEqual(len(likelihood.call_args), expected_calls1) + # Call the model posterior to reproduce post-fitting usage. + mll.model.posterior(train_X, observation_noise=True) + # Compare the call args to ensure they're all the same. + # Likelihood is called twice on model(X) and once for adding the noise. + self.assertEqual(len(likelihood.call_args), expected_calls2) + arg0 = likelihood.call_args[0] + for i in range(1, expected_calls2): + argi = likelihood.call_args[i] + # The arg may be a tensor or a single element list of the tensor. + self.assertAllClose( + arg0 if isinstance(arg0, Tensor) else arg0[0], + argi if isinstance(argi, Tensor) else argi[0], + )