From c89583ee668a220e808f8d7225bf5b133aafc668 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 21 Sep 2023 18:22:23 +0100
Subject: [PATCH 01/33] Add support for local models and datasets (WIP)

---
 docs/notebooks/trust_region.pct.py |  15 ++-
 trieste/acquisition/rule.py        | 145 +++++++++++++++++++++++------
 trieste/acquisition/utils.py       |  23 ++++-
 trieste/bayesian_optimizer.py      |  60 +++++++++---
 trieste/objectives/utils.py        |  53 ++++++++++-
 trieste/utils/misc.py              |  21 +++--
 6 files changed, 256 insertions(+), 61 deletions(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index c55a80fe28..0915ef22bf 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -199,15 +199,15 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 # Note: the number of sub-spaces/regions must match the number of batch query points.
 
 # %%
-num_query_points = 5
+num_query_points = 6
 
 init_subspaces = [
-    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space)
-    for _ in range(num_query_points)
+    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space, i)
+    for i in range(num_query_points)
 ]
 base_rule = trieste.acquisition.rule.EfficientGlobalOptimization(  # type: ignore[var-annotated]
     builder=trieste.acquisition.ParallelContinuousThompsonSampling(),
-    num_query_points=num_query_points,
+    #num_query_points=num_query_points,
 )
 batch_acq_rule = trieste.acquisition.rule.BatchTrustRegionBox(
     init_subspaces, base_rule
@@ -226,7 +226,12 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 
 num_steps = 5
 result = bo.optimize(
-    num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
+    #num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
+    num_steps,
+    initial_data,
+    trieste.acquisition.utils.copy_to_local_models(build_model(), 2),
+    batch_acq_rule,
+    track_state=True,
 )
 dataset = result.try_get_final_dataset()
 
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 50acaa21b2..37a5c89252 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -972,6 +972,30 @@ def update(
         """
         ...
 
+    def select_model(
+        self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
+    ) -> Optional[ProbabilisticModelType]:
+        """
+        Select a single model belonging to this region. This is an optional method that is
+        only required if the region is used with single model acquisition functions.
+
+        :param models: The model for each tag.
+        :return: The model belonging to this region.
+        """
+        # By default return the OBJECTIVE model.
+        return get_value_for_tag(models, OBJECTIVE)
+
+    def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[Dataset]:
+        """
+        Select a single dataset belonging to this region. This is an optional method that is
+        only required if the region is used with single model acquisition functions.
+
+        :param datasets: The datasets for each tag.
+        :return: The dataset belonging to this region.
+        """
+        # By default return the OBJECTIVE dataset.
+        return get_value_for_tag(datasets, OBJECTIVE)
+
 
 UpdatableTrustRegionType = TypeVar("UpdatableTrustRegionType", bound=UpdatableTrustRegion)
 """ A type variable bound to :class:`UpdatableTrustRegion`. """
@@ -1055,10 +1079,21 @@ def acquire(
             points from the previous acquisition state.
         """
 
+        # Subspaces should be set by the time we call `acquire`.
+        assert self._tags is not None
+        assert self._init_subspaces is not None
+
+        num_subspaces = len(self._tags)
+        num_objective_models = len([tag for tag in models if tag.split("__")[0] == OBJECTIVE])
+        assert num_subspaces % num_objective_models == 0, (
+            f"The number of subspaces {num_subspaces} should be a multiple of the number of "
+            f"objective models {num_objective_models}"
+        )
+
         def state_func(
             state: BatchTrustRegion.State | None,
         ) -> Tuple[BatchTrustRegion.State | None, TensorType]:
-            # Subspaces should be set by the time we call `acquire`.
+            # Check again to keep mypy happy.
             assert self._tags is not None
             assert self._init_subspaces is not None
 
@@ -1092,9 +1127,35 @@ def state_func(
                 acquisition_space = state.acquisition_space
 
             state_ = BatchTrustRegion.State(acquisition_space)
-            points = self._rule.acquire(acquisition_space, models, datasets=datasets)
 
-            return state_, points
+            # If the base rule is a single model acquisition rule, but we have multiple models,
+            # run the base rule sequentially for each subspace.
+            # Otherwise, run the base rule once with all models and datasets.
+            if isinstance(self._rule, EfficientGlobalOptimization) and hasattr(
+                self._rule._builder, "single_builder"
+            ) and (len(models) > 1 or OBJECTIVE not in models):
+                points = []
+                #for tag, model in models.items():
+                #    global_tag, index_tag = tag.split("__")
+                #    tags = [tag, global_tag]  # Prefer local dataset if available.
+                #    dataset = get_value_for_tag(datasets, tags)
+                for subspace in subspaces:
+                    model = subspace.select_model(models)
+                    dataset = subspace.select_dataset(datasets)
+                    points.append(
+                        self._rule.acquire(
+                            subspace,
+                            # Using default tag, as that is what single model acquisition builders
+                            # expect.
+                            {OBJECTIVE: model},
+                            {OBJECTIVE: dataset},
+                        )
+                    )
+                points = tf.concat(points, axis=0)
+            else:
+                points = self._rule.acquire(acquisition_space, models, datasets=datasets)
+
+            return state_, tf.reshape(points, [-1, len(subspaces), points.shape[-1]])
 
         return state_func
 
@@ -1150,6 +1211,7 @@ class SingleObjectiveTrustRegionBox(Box, UpdatableTrustRegion):
     def __init__(
         self,
         global_search_space: SearchSpace,
+        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
@@ -1166,6 +1228,7 @@ def __init__(
         """
 
         self._global_search_space = global_search_space
+        self._index = index
         self._beta = beta
         self._kappa = kappa
         self._min_eps = min_eps
@@ -1199,13 +1262,13 @@ def initialize(
         Initialize the box by sampling a location from the global search space and setting the
         bounds.
         """
-        dataset = get_value_for_tag(datasets)
+        dataset = self.select_dataset(datasets)
 
         self.location = tf.squeeze(self.global_search_space.sample(1), axis=0)
         self._step_is_success = False
         self._init_eps()
         self._update_bounds()
-        _, self._y_min = self.get_local_min(dataset)
+        _, self._y_min = self.get_dataset_min(dataset)
 
     def update(
         self,
@@ -1223,13 +1286,13 @@ def update(
         ``1 / beta``. Conversely, if it was unsuccessful, the size is reduced by the factor
         ``beta``.
         """
-        dataset = get_value_for_tag(datasets)
+        dataset = self.select_dataset(datasets)
 
         if tf.reduce_any(self.eps < self._min_eps):
             self.initialize(models, datasets)
             return
 
-        x_min, y_min = self.get_local_min(dataset)
+        x_min, y_min = self.get_dataset_min(dataset)
         self.location = x_min
 
         tr_volume = tf.reduce_prod(self.upper - self.lower)
@@ -1238,24 +1301,55 @@ def update(
         self._update_bounds()
         self._y_min = y_min
 
+    def select_model(
+        self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
+    ) -> Optional[ProbabilisticModelType]:
+        # Select the model belonging to this box. Note there isn't necessarily a one-to-one
+        # mapping between regions and models.
+        if self._index is None:
+            tags = OBJECTIVE  # If no index, then pick the global dataset.
+        else:
+            num_objective_models = len([tag for tag in models if tag.split("__")[0] == OBJECTIVE])
+            index = self._index % num_objective_models
+            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
+        return get_value_for_tag(models, tags)
+
+    def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[Dataset]:
+        # Select the dataset belonging to this box. Note there isn't necessarily a one-to-one
+        # mapping between regions and datasets.
+        # There are two possible ways local datasets can
+        # be stored; either with a specific tag, or as a specific slice of the global dataset.
+        if self._index is None:
+            tags = OBJECTIVE  # If no index, then pick the global dataset.
+        else:
+            num_objective_datasets = len([tag for tag in datasets if tag.split("__")[0] == OBJECTIVE])
+            index = self._index % num_objective_datasets
+            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
+        return get_value_for_tag(datasets, tags)
+
+        ## If dataset is greater than rank 2, dimension 1 is the batch dimension. Select the slice
+        ## corresponding to this box.
+        ## Note: it is possible that this is already a local dataset from the previous step. However,
+        ## to keep the code general, we make no distinction between local and global datasets here;
+        ## as this is not an expected use case.
+        #if self._index is not None and dataset is not None and tf.rank(dataset.observations) > 2:
+        #    dataset = Dataset(dataset.query_points[:, self._index, ...], dataset.observations[:, self._index, ...])
+        #return dataset
+
     @check_shapes(
         "return[0]: [D]",
         "return[1]: []",
     )
-    def get_local_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
-        """Calculate the local minimum of the box using the given dataset."""
+    def get_dataset_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
+        """Calculate the minimum of the box using the given dataset."""
         if dataset is None:
             raise ValueError("""dataset must be provided""")
 
-        in_tr = self.contains(dataset.query_points)
-        in_tr_obs = tf.where(
-            tf.expand_dims(in_tr, axis=-1),
-            dataset.observations,
-            tf.constant(np.inf, dtype=dataset.observations.dtype),
-        )
-        ix = tf.argmin(in_tr_obs)
+        # Note the behaviour here depends on the dataset passed in, which could be the global
+        # dataset or the local dataset.
+        ix = tf.argmin(dataset.observations)
         x_min = tf.gather(dataset.query_points, ix)
-        y_min = tf.gather(in_tr_obs, ix)
+        y_min = tf.gather(dataset.observations, ix)
 
         return tf.squeeze(x_min, axis=0), tf.squeeze(y_min)
 
@@ -1284,7 +1378,7 @@ def acquire(
                 num_query_points = 1
 
             self._init_subspaces = tuple(
-                [SingleObjectiveTrustRegionBox(search_space) for _ in range(num_query_points)]
+                [SingleObjectiveTrustRegionBox(search_space, i) for i in range(num_query_points)]
             )
             self._tags = tuple([str(index) for index in range(len(self._init_subspaces))])
 
@@ -1336,11 +1430,12 @@ class TREGOBox(SingleObjectiveTrustRegionBox):
     def __init__(
         self,
         global_search_space: SearchSpace,
+        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
     ):
-        super().__init__(global_search_space, beta, kappa, min_eps)
+        super().__init__(global_search_space, index, beta, kappa, min_eps)
         self._is_global = False
         self._initialized = False
 
@@ -1382,18 +1477,6 @@ def initialize(
 
         super().initialize(models, datasets)
 
-    @inherit_check_shapes
-    def get_local_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
-        if dataset is None:
-            raise ValueError("""dataset must be provided""")
-
-        # Always return the global minimum.
-        ix = tf.argmin(dataset.observations)
-        x_min = tf.gather(dataset.query_points, ix)
-        y_min = tf.gather(dataset.observations, ix)
-
-        return tf.squeeze(x_min, axis=0), tf.squeeze(y_min)
-
 
 class TURBO(
     AcquisitionRule[
diff --git a/trieste/acquisition/utils.py b/trieste/acquisition/utils.py
index 590b0bb416..602afebc77 100644
--- a/trieste/acquisition/utils.py
+++ b/trieste/acquisition/utils.py
@@ -11,15 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import functools
-from typing import Tuple, Union
+from typing import Mapping, Tuple, Union
 
 import tensorflow as tf
 from check_shapes import check_shapes
 
 from ..data import Dataset
+from ..models.interfaces import ProbabilisticModel
+from ..observer import OBJECTIVE
 from ..space import SearchSpaceType
-from ..types import TensorType
+from ..types import Tag, TensorType
 from .interface import AcquisitionFunction
 from .optimizer import AcquisitionOptimizer
 
@@ -138,6 +141,22 @@ def get_local_dataset(local_space: SearchSpaceType, dataset: Dataset) -> Dataset
     return local_dataset
 
 
+def copy_to_local_models(
+    global_model: ProbabilisticModel,
+    num_local_models: int,
+    key: Tag = OBJECTIVE,
+) -> Mapping[Tag, ProbabilisticModel]:
+    """
+    Helper method to copy a global model to local models.
+
+    :param global_model: The global model.
+    :param num_local_models: The number of local models to create.
+    :param key: The tag prefix for the local models.
+    :return: A mapping of the local models.
+    """
+    return {f"{key}__{i}": copy.deepcopy(global_model) for i in range(num_local_models)}
+
+
 @check_shapes(
     "points: [n_points, ...]",
     "return: [n_points]",
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 2d8f3833ab..7c3a01afbe 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -57,10 +57,12 @@
 from .acquisition.rule import TURBO, AcquisitionRule, EfficientGlobalOptimization
 from .data import Dataset
 from .models import SupportsCovarianceWithTopFidelity, TrainableProbabilisticModel
+from .objectives.utils import mk_batch_observer
 from .observer import OBJECTIVE, Observer
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Err, Ok, Result, Timer
+from .utils.misc import get_value_for_tag
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -96,18 +98,22 @@ class Record(Generic[StateType]):
     @property
     def dataset(self) -> Dataset:
         """The dataset when there is just one dataset."""
-        if len(self.datasets) == 1:
-            return next(iter(self.datasets.values()))
+        # Ignore local datasets.
+        datasets = dict(filter(lambda item: "__" not in item[0], self.datasets.items()))
+        if len(datasets) == 1:
+            return next(iter(datasets.values()))
         else:
-            raise ValueError(f"Expected a single dataset, found {len(self.datasets)}")
+            raise ValueError(f"Expected a single dataset, found {len(datasets)}")
 
     @property
     def model(self) -> TrainableProbabilisticModel:
         """The model when there is just one dataset."""
-        if len(self.models) == 1:
-            return next(iter(self.models.values()))
+        # Ignore local models.
+        models = dict(filter(lambda item: "__" not in item[0], self.models.items()))
+        if len(models) == 1:
+            return next(iter(models.values()))
         else:
-            raise ValueError(f"Expected a single model, found {len(self.models)}")
+            raise ValueError(f"Expected a single model, found {len(models)}")
 
     def save(self, path: Path | str) -> FrozenRecord[StateType]:
         """Save the record to disk. Will overwrite any existing file at the same path."""
@@ -226,6 +232,8 @@ def try_get_final_dataset(self) -> Dataset:
         :raise ValueError: If the optimization was not a single dataset run.
         """
         datasets = self.try_get_final_datasets()
+        # Ignore local datasets.
+        datasets = dict(filter(lambda item: "__" not in item[0], datasets.items()))
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -269,6 +277,8 @@ def try_get_final_model(self) -> TrainableProbabilisticModel:
         :raise ValueError: If the optimization was not a single model run.
         """
         models = self.try_get_final_models()
+        # Ignore local models.
+        models = dict(filter(lambda item: "__" not in item[0], models.items()))
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -627,6 +637,7 @@ def optimize(
         """
         if isinstance(datasets, Dataset):
             datasets = {OBJECTIVE: datasets}
+        if not isinstance(models, Mapping):
             models = {OBJECTIVE: models}  # type: ignore[dict-item]
 
         # reassure the type checker that everything is tagged
@@ -636,10 +647,13 @@ def optimize(
         if num_steps < 0:
             raise ValueError(f"num_steps must be at least 0, got {num_steps}")
 
-        if datasets.keys() != models.keys():
+        # Get set of dataset keys, ignoring suffix starting with double underscore.
+        datasets_keys = {tag.split("__")[0] for tag in datasets.keys()}
+        models_keys = {tag.split("__")[0] for tag in models.keys()}
+        if datasets_keys != models_keys:
             raise ValueError(
-                f"datasets and models should contain the same keys. Got {datasets.keys()} and"
-                f" {models.keys()} respectively."
+                f"datasets and models should contain the same keys. Got {datasets_keys} and"
+                f" {models_keys} respectively."
             )
 
         if not datasets:
@@ -717,7 +731,8 @@ def optimize(
                 if step == 1 and fit_model and fit_initial_model:
                     with Timer() as initial_model_fitting_timer:
                         for tag, model in models.items():
-                            dataset = datasets[tag]
+                            tags = [tag, tag.split("__")[0]]  # Prefer local dataset if available.
+                            dataset = get_value_for_tag(datasets, tags)
                             model.update(dataset)
                             model.optimize_and_save_result(dataset)
                     if summary_writer:
@@ -738,7 +753,14 @@ def optimize(
                         else:
                             query_points = points_or_stateful
 
-                    observer_output = self._observer(query_points)
+                    observer = self._observer
+                    # If query_points are rank 3, then use a batched observer.
+                    if tf.rank(query_points) == 3:
+                        num_objective_models = len(
+                            [tag for tag in models if tag.split("__")[0] == OBJECTIVE]
+                        )
+                        observer = mk_batch_observer(observer, num_objective_models, OBJECTIVE)
+                    observer_output = observer(query_points)
 
                     tagged_output = (
                         observer_output
@@ -746,11 +768,23 @@ def optimize(
                         else {OBJECTIVE: observer_output}
                     )
 
-                    datasets = {tag: datasets[tag] + tagged_output[tag] for tag in tagged_output}
+                    # Account for the case where there may be an initial dataset that is not tagged
+                    # per region. In this case, only the global dataset will exist in datasets. We
+                    # want to copy this initial dataset to all the regions.
+                    #
+                    # If a tag from tagged_output does not exist in datasets, then add it to
+                    # datasets by copying the dataset from datasets with the same tag-prefix.
+                    # Otherwise keep the existing dataset from datasets.
+                    datasets = {
+                        tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + tagged_output[tag]
+                        for tag in tagged_output
+                    }
+
                     with Timer() as model_fitting_timer:
                         if fit_model:
                             for tag, model in models.items():
-                                dataset = datasets[tag]
+                                tags = [tag, tag.split("__")[0]]  # Prefer local dataset if available.
+                                dataset = get_value_for_tag(datasets, tags)
                                 model.update(dataset)
                                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 31e70a52eb..69714c8686 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -18,12 +18,13 @@
 """
 
 from __future__ import annotations
+import tensorflow as tf
 
 from collections.abc import Callable
-from typing import Optional, overload
+from typing import Mapping, Optional, Union, overload
 
 from ..data import Dataset
-from ..observer import MultiObserver, Observer, SingleObserver
+from ..observer import OBJECTIVE, MultiObserver, Observer, SingleObserver
 from ..types import Tag, TensorType
 
 
@@ -57,3 +58,51 @@ def mk_multi_observer(**kwargs: Callable[[TensorType], TensorType]) -> MultiObse
     :return: An multi-observer returning the data from ``kwargs``.
     """
     return lambda qp: {key: Dataset(qp, objective(qp)) for key, objective in kwargs.items()}
+
+
+def mk_batch_observer(
+    objective_or_observer: Union[Callable[[TensorType], TensorType], SingleObserver],
+    batch_size: int,
+    key: Optional[Tag] = None,
+) -> Observer:
+    """
+    Create an observer that returns the data from ``objective`` or an existing ``observer``
+    separately for each query point in a batch.
+
+    :param objective_or_observer: An objective or an existing observer designed to be used with a
+        single data set and model.
+    :param batch_size: The batch size of the observer.
+    :param key: An optional key to use to access the data from the observer result.
+    :return: A multi-observer across the batch dimension of query points, returning the data from
+        ``objective``. If ``key`` is provided, the observer will be a mapping. Otherwise, it will
+        return a single dataset.
+    """
+
+    def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
+        assert tf.rank(qps) == 3, (
+            f"query points must be rank 3 for batch observer, got {tf.rank(qps)}"
+        )
+
+        # Call objective with rank 2 query points by flattening batch dimension.
+        # Some objectives might only expect rank 2 query points, so this is safer.
+        qps = tf.reshape(qps, [-1, qps.shape[-1]])
+        obs_or_dataset = objective_or_observer(qps)
+
+        if not isinstance(obs_or_dataset, Dataset):
+            obs_or_dataset = Dataset(qps, obs_or_dataset)
+
+        if key is None:
+            # Always use rank 2 shape as models (e.g. GPR) expect this, so return as is.
+            return obs_or_dataset
+        else:
+            # Include overall dataset and per batch dataset.
+            obs = obs_or_dataset.observations
+            qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
+            obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
+            datasets = {
+                OBJECTIVE: obs_or_dataset,
+                **{f"{key}__{i}": Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)}
+            }
+            return datasets
+
+    return _observer
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index c29b3c4e51..a42a9c114c 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -16,7 +16,7 @@
 from abc import ABC, abstractmethod
 from time import perf_counter
 from types import TracebackType
-from typing import Any, Callable, Generic, Mapping, NoReturn, Optional, Tuple, Type, TypeVar
+from typing import Any, Callable, Generic, Mapping, NoReturn, Optional, Sequence, Tuple, Type, TypeVar, Union
 
 import numpy as np
 import tensorflow as tf
@@ -220,21 +220,26 @@ def map_values(f: Callable[[U], V], mapping: Mapping[K, U]) -> Mapping[K, V]:
 """ An unbound type variable. """
 
 
-def get_value_for_tag(mapping: Optional[Mapping[Tag, T]], tag: Tag = OBJECTIVE) -> Optional[T]:
-    """Return the value of a tag in a mapping.
+def get_value_for_tag(
+    mapping: Optional[Mapping[Tag, T]], tags: Union[Tag, Sequence[Tag]] = OBJECTIVE
+) -> Optional[T]:
+    """Return the value from a mapping for the first tag found from a sequence of tags.
 
     :param mapping: A mapping from tags to values.
-    :param tag: A tag.
+    :param tags: A tag or a sequence of tags. Sequence is searched in order.
     :return: The value of the tag in the mapping, or None if the mapping is None.
-    :raises ValueError: If the tag is not in the mapping and the mapping is not None.
+    :raises ValueError: If none of the tags are in the mapping and the mapping is not None.
     """
 
+    if isinstance(tags, Tag):
+        tags = [tags]
+
     if mapping is None:
         return None
-    elif tag in mapping.keys():
-        return mapping[tag]
+    elif matched_tags := sorted(set(tags) & set(mapping.keys()), key = tags.index):
+        return mapping[matched_tags[0]]
     else:
-        raise ValueError(f"tag '{tag}' not found in mapping")
+        raise ValueError(f"none of the tags '{tags}' found in mapping")
 
 
 class Timer:

From c8aebec777ba8719cd4fc59959b7fab2b6dbf787 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Mon, 25 Sep 2023 11:47:35 +0100
Subject: [PATCH 02/33] Add unit test for local models (WIP)

---
 tests/unit/acquisition/test_rule.py | 77 ++++++++++++++++++++++++++++-
 trieste/acquisition/rule.py         | 27 +++-------
 2 files changed, 81 insertions(+), 23 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index 3bee1fd575..c1b28b694c 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -32,6 +32,7 @@
 from trieste.acquisition import (
     AcquisitionFunction,
     AcquisitionFunctionBuilder,
+    MultipleOptimismNegativeLowerConfidenceBound,
     NegativeLowerConfidenceBound,
     ParallelContinuousThompsonSampling,
     SingleModelAcquisitionBuilder,
@@ -1444,12 +1445,15 @@ def __init__(
         self,
         fixed_location: TensorType,
         global_search_space: SearchSpace,
+        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
+        init_eps: float = 0.07,
     ):
-        super().__init__(global_search_space, beta, kappa, min_eps)
+        super().__init__(global_search_space, index, beta, kappa, min_eps)
         self._location = fixed_location
+        self._init_eps_val = init_eps
 
     @property
     def location(self) -> TensorType:
@@ -1460,7 +1464,7 @@ def location(self, location: TensorType) -> None:
         ...
 
     def _init_eps(self) -> None:
-        self.eps = tf.constant(0.07, dtype=tf.float64)
+        self.eps = tf.constant(self._init_eps_val, dtype=tf.float64)
 
 
 # Start with a defined state and dataset. Acquire should return an updated state.
@@ -1517,6 +1521,75 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
         npt.assert_allclose(subspace.eps, exp_eps)
 
 
+# Define a test case with multiple local models and multiple regions
+@pytest.mark.parametrize("num_local_models", [0, 1, 2])
+@pytest.mark.parametrize("num_regions", [2, 4])
+@pytest.mark.parametrize("num_query_points_per_region", [1, 2])
+def test_batch_trust_region_box_with_multiple_models_and_regions(
+    num_local_models: int, num_regions: int, num_query_points_per_region: int
+):
+    search_space = Box([0.0, 0.0], [6.0, 6.0])
+    base_shift = tf.constant([2.0, 2.0], dtype=tf.float64)  # Common base shift for all regions.
+    eps = 0.9
+    subspaces = [TestTrustRegionBox(base_shift+i, search_space, i, init_eps=eps) for i in range(num_regions)]
+
+    # Define the models and acquisition functions for each region
+    noise_variance = tf.constant(1e-6, dtype=tf.float64)
+    kernel_variance = tf.constant(1e-3, dtype=tf.float64)
+    kernel_lengthscale = tf.constant(0.1, dtype=tf.float64)
+    if num_local_models == 0:
+        x_shift = tf.constant([0.0, 0.0], dtype=tf.float64)
+        init_datasets = {OBJECTIVE: Dataset(
+            x_shift, tf.constant([[0.0]], dtype=tf.float64)
+        )}
+        models = {OBJECTIVE: QuadraticMeanAndRBFKernelWithSamplers(
+            init_datasets[OBJECTIVE], x_shift=x_shift, kernel_amplitude=kernel_variance, noise_variance=noise_variance
+        )}
+    else:
+        # Dataset per model, one point at the center/minimum of the region.
+        init_datasets = {}
+        models = {}
+        for i in range(num_local_models):
+            tag = OBJECTIVE + f"__{i}"
+            shift = base_shift + i
+            query_points = tf.stack([
+                shift - [eps, eps],
+                shift - [eps, 0.0] + [0.0, eps],
+                shift,
+                shift + [eps, eps],
+                shift - [0.0, eps] + [eps, 0.0],
+            ])
+            observations = tf.constant([[eps**2], [eps**2], [0.0], [eps**2], [eps**2]], dtype=tf.float64)
+            init_datasets[tag] = Dataset(query_points, observations)
+            models[tag] = QuadraticMeanAndRBFKernelWithSamplers(
+                init_datasets[tag], x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
+            )
+
+    for model in models.values():
+        model.kernel = (
+            gpflow.kernels.RBF(
+                variance=kernel_variance,
+                lengthscales=kernel_lengthscale,
+            )
+        )  # need a gpflow kernel object for random feature decompositions
+
+    if num_local_models == 0:
+        # Global model; acquire in parallel.
+        num_query_points = num_regions * num_query_points_per_region
+    else:
+        # Local models; acquire sequentially.
+        num_query_points = num_query_points_per_region
+    base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
+        #builder=ParallelContinuousThompsonSampling(), num_query_points=num_query_points
+        builder=MultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=num_query_points
+    )
+
+    mtb = BatchTrustRegionBox(subspaces, base_rule)
+    _, points = mtb.acquire(search_space, models, init_datasets)(None)
+
+    tf.debugging.assert_shapes([(points, [num_query_points_per_region, num_regions, 2])])
+
+
 def test_multi_trust_region_box_state_deepcopy() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     dataset = Dataset(
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 37a5c89252..41bc3d1bc6 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -990,7 +990,7 @@ def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[
         Select a single dataset belonging to this region. This is an optional method that is
         only required if the region is used with single model acquisition functions.
 
-        :param datasets: The datasets for each tag.
+        :param datasets: The dataset for each tag.
         :return: The dataset belonging to this region.
         """
         # By default return the OBJECTIVE dataset.
@@ -1128,17 +1128,13 @@ def state_func(
 
             state_ = BatchTrustRegion.State(acquisition_space)
 
-            # If the base rule is a single model acquisition rule, but we have multiple models,
-            # run the base rule sequentially for each subspace.
+            # If the base rule is a single model acquisition rule, but we have (multiple) local
+            # models, run the base rule sequentially for each subspace.
             # Otherwise, run the base rule once with all models and datasets.
             if isinstance(self._rule, EfficientGlobalOptimization) and hasattr(
                 self._rule._builder, "single_builder"
             ) and (len(models) > 1 or OBJECTIVE not in models):
                 points = []
-                #for tag, model in models.items():
-                #    global_tag, index_tag = tag.split("__")
-                #    tags = [tag, global_tag]  # Prefer local dataset if available.
-                #    dataset = get_value_for_tag(datasets, tags)
                 for subspace in subspaces:
                     model = subspace.select_model(models)
                     dataset = subspace.select_dataset(datasets)
@@ -1153,7 +1149,7 @@ def state_func(
                     )
                 points = tf.concat(points, axis=0)
             else:
-                points = self._rule.acquire(acquisition_space, models, datasets=datasets)
+                points = self._rule.acquire(acquisition_space, models, datasets)
 
             return state_, tf.reshape(points, [-1, len(subspaces), points.shape[-1]])
 
@@ -1307,18 +1303,16 @@ def select_model(
         # Select the model belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and models.
         if self._index is None:
-            tags = OBJECTIVE  # If no index, then pick the global dataset.
+            tags = OBJECTIVE  # If no index, then pick the global model.
         else:
             num_objective_models = len([tag for tag in models if tag.split("__")[0] == OBJECTIVE])
             index = self._index % num_objective_models
-            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
+            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local model if available.
         return get_value_for_tag(models, tags)
 
     def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[Dataset]:
         # Select the dataset belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and datasets.
-        # There are two possible ways local datasets can
-        # be stored; either with a specific tag, or as a specific slice of the global dataset.
         if self._index is None:
             tags = OBJECTIVE  # If no index, then pick the global dataset.
         else:
@@ -1327,15 +1321,6 @@ def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[
             tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
         return get_value_for_tag(datasets, tags)
 
-        ## If dataset is greater than rank 2, dimension 1 is the batch dimension. Select the slice
-        ## corresponding to this box.
-        ## Note: it is possible that this is already a local dataset from the previous step. However,
-        ## to keep the code general, we make no distinction between local and global datasets here;
-        ## as this is not an expected use case.
-        #if self._index is not None and dataset is not None and tf.rank(dataset.observations) > 2:
-        #    dataset = Dataset(dataset.query_points[:, self._index, ...], dataset.observations[:, self._index, ...])
-        #return dataset
-
     @check_shapes(
         "return[0]: [D]",
         "return[1]: []",

From 4b3c2de6b9db655bf188bad7067e25228a7ff77f Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 29 Sep 2023 14:04:01 +0100
Subject: [PATCH 03/33] Update multi model/dataset test (WIP)

---
 tests/unit/acquisition/test_rule.py | 115 +++++++++++++++++++---------
 trieste/acquisition/rule.py         |  34 +++++---
 trieste/bayesian_optimizer.py       |   3 +-
 3 files changed, 103 insertions(+), 49 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index c1b28b694c..77dea07f54 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -22,6 +22,7 @@
 import numpy.testing as npt
 import pytest
 import tensorflow as tf
+import tensorflow_probability as tfp
 
 from tests.util.misc import empty_dataset, quadratic, random_seed
 from tests.util.models.gpflow.models import (
@@ -1523,10 +1524,11 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
 
 # Define a test case with multiple local models and multiple regions
 @pytest.mark.parametrize("num_local_models", [0, 1, 2])
+@pytest.mark.parametrize("use_global_dataset", [True, False])
 @pytest.mark.parametrize("num_regions", [2, 4])
 @pytest.mark.parametrize("num_query_points_per_region", [1, 2])
 def test_batch_trust_region_box_with_multiple_models_and_regions(
-    num_local_models: int, num_regions: int, num_query_points_per_region: int
+    num_local_models: int, use_global_dataset: bool, num_regions: int, num_query_points_per_region: int
 ):
     search_space = Box([0.0, 0.0], [6.0, 6.0])
     base_shift = tf.constant([2.0, 2.0], dtype=tf.float64)  # Common base shift for all regions.
@@ -1537,41 +1539,66 @@ def test_batch_trust_region_box_with_multiple_models_and_regions(
     noise_variance = tf.constant(1e-6, dtype=tf.float64)
     kernel_variance = tf.constant(1e-3, dtype=tf.float64)
     kernel_lengthscale = tf.constant(0.1, dtype=tf.float64)
-    if num_local_models == 0:
-        x_shift = tf.constant([0.0, 0.0], dtype=tf.float64)
-        init_datasets = {OBJECTIVE: Dataset(
-            x_shift, tf.constant([[0.0]], dtype=tf.float64)
-        )}
-        models = {OBJECTIVE: QuadraticMeanAndRBFKernelWithSamplers(
-            init_datasets[OBJECTIVE], x_shift=x_shift, kernel_amplitude=kernel_variance, noise_variance=noise_variance
-        )}
-    else:
-        # Dataset per model, one point at the center/minimum of the region.
-        init_datasets = {}
-        models = {}
-        for i in range(num_local_models):
+    #if num_local_models == 0:
+    #    x_shift = tf.constant([0.0, 0.0], dtype=tf.float64)
+    #    init_datasets = {OBJECTIVE: Dataset(
+    #        x_shift, tf.constant([[0.0]], dtype=tf.float64)
+    #    )}
+    #    models = {OBJECTIVE: QuadraticMeanAndRBFKernelWithSamplers(
+    #        init_datasets[OBJECTIVE], x_shift=x_shift, kernel_amplitude=kernel_variance, noise_variance=noise_variance
+    #    )}
+    #else:
+    # Dataset per model, one point at the center/minimum of the region.
+    global_dataset = Dataset(
+        tf.constant([[0.0, 0.0]], dtype=tf.float64),
+        tf.constant([[1.0]], dtype=tf.float64),
+    )
+    init_datasets = {OBJECTIVE: global_dataset}
+    models = {}
+    r = range(1) if num_local_models == 0 else range(num_local_models)
+    for i in r:
+        if num_local_models == 0:
+            tag = OBJECTIVE
+            num_models = 1
+        else:
             tag = OBJECTIVE + f"__{i}"
-            shift = base_shift + i
-            query_points = tf.stack([
-                shift - [eps, eps],
-                shift - [eps, 0.0] + [0.0, eps],
-                shift,
-                shift + [eps, eps],
-                shift - [0.0, eps] + [eps, 0.0],
-            ])
-            observations = tf.constant([[eps**2], [eps**2], [0.0], [eps**2], [eps**2]], dtype=tf.float64)
+            num_models = num_local_models
+
+        num_regions_per_model = num_regions // num_models
+        query_points = tf.stack([base_shift+j
+                   for j in range(i, num_regions, num_models)])
+        observations = tf.constant([0.0] * num_regions_per_model, dtype=tf.float64)[:, None]
+        #shift = base_shift + i
+        #query_points = tf.stack([
+        #    shift - [eps, 0.0] + [0.0, eps],
+        #    shift,
+        #    shift + [eps, eps],
+        #    shift - [0.0, eps] + [eps, 0.0],
+        #])
+        #observations = tf.constant([[eps**2], [eps**2], [0.0], [eps**2], [eps**2]], dtype=tf.float64)
+        if not use_global_dataset:
             init_datasets[tag] = Dataset(query_points, observations)
-            models[tag] = QuadraticMeanAndRBFKernelWithSamplers(
-                init_datasets[tag], x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
-            )
-
-    for model in models.values():
-        model.kernel = (
-            gpflow.kernels.RBF(
-                variance=kernel_variance,
-                lengthscales=kernel_lengthscale,
-            )
-        )  # need a gpflow kernel object for random feature decompositions
+        #models[tag] = QuadraticMeanAndRBFKernelWithSamplers(
+        #    init_datasets[tag], x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
+        #)
+        #models[tag] = QuadraticMeanAndRBFKernel(
+        #    x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
+        #)
+        kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(kernel_variance)
+        mean_function = lambda x, i=i: tf.reduce_prod(tf.stack([quadratic(x - tf.cast(base_shift+j, dtype=x.dtype))
+                   for j in range(i, num_regions, num_models)]),
+                      axis=0)
+        #mean_function = lambda x, i=i: quadratic(x - tf.cast(base_shift+i, dtype=x.dtype))
+        models[tag] = GaussianProcess([mean_function], [kernel], noise_variance)
+        models[tag]._exp_dataset = global_dataset if use_global_dataset else init_datasets[tag]
+
+    #for model in models.values():
+    #    model.kernel = (
+    #        gpflow.kernels.RBF(
+    #            variance=kernel_variance,
+    #            lengthscales=kernel_lengthscale,
+    #        )
+    #    )  # need a gpflow kernel object for random feature decompositions
 
     if num_local_models == 0:
         # Global model; acquire in parallel.
@@ -1579,15 +1606,31 @@ def test_batch_trust_region_box_with_multiple_models_and_regions(
     else:
         # Local models; acquire sequentially.
         num_query_points = num_query_points_per_region
+
+    class TestMultipleOptimismNegativeLowerConfidenceBound(MultipleOptimismNegativeLowerConfidenceBound):
+        # Override the prepare_acquisition_function method to check that the dataset is correct.
+        def prepare_acquisition_function(
+            self,
+            model: ProbabilisticModel,
+            dataset: Optional[Dataset] = None,
+        ) -> AcquisitionFunction:
+            assert dataset is model._exp_dataset
+            return super().prepare_acquisition_function(model, dataset)
+
     base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
         #builder=ParallelContinuousThompsonSampling(), num_query_points=num_query_points
-        builder=MultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=num_query_points
+        builder=TestMultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=num_query_points
     )
 
     mtb = BatchTrustRegionBox(subspaces, base_rule)
     _, points = mtb.acquire(search_space, models, init_datasets)(None)
 
-    tf.debugging.assert_shapes([(points, [num_query_points_per_region, num_regions, 2])])
+    npt.assert_array_equal(points.shape, [num_query_points_per_region, num_regions, 2])
+
+    # Each region should find the minimum of its local model, which will be the center of the region.
+    exp_points = tf.stack([base_shift+i for i in range(num_regions)])
+    exp_points = tf.tile(exp_points[None, :, :], [num_query_points_per_region, 1, 1])
+    npt.assert_allclose(points, exp_points)
 
 
 def test_multi_trust_region_box_state_deepcopy() -> None:
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 41bc3d1bc6..3e75e7a217 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -1084,12 +1084,23 @@ def acquire(
         assert self._init_subspaces is not None
 
         num_subspaces = len(self._tags)
-        num_objective_models = len([tag for tag in models if tag.split("__")[0] == OBJECTIVE])
+        num_objective_models = len([tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE])
+        num_objective_models = max(num_objective_models, 1)
         assert num_subspaces % num_objective_models == 0, (
             f"The number of subspaces {num_subspaces} should be a multiple of the number of "
             f"objective models {num_objective_models}"
         )
 
+        # If the base rule is a single model acquisition rule, but we have (multiple) local
+        # models, run the (deepcopied) base rule sequentially for each subspace.
+        # Otherwise, run the base rule as is, once with all models and datasets.
+        # Note: this should only trigger on the first call to `acquire`, as after that `self._rule`
+        # will be a list of rules.
+        if isinstance(self._rule, EfficientGlobalOptimization) and hasattr(
+            self._rule._builder, "single_builder"
+        ) and (num_objective_models > 1 or OBJECTIVE not in models):
+            self._rule = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
+
         def state_func(
             state: BatchTrustRegion.State | None,
         ) -> Tuple[BatchTrustRegion.State | None, TensorType]:
@@ -1128,18 +1139,15 @@ def state_func(
 
             state_ = BatchTrustRegion.State(acquisition_space)
 
-            # If the base rule is a single model acquisition rule, but we have (multiple) local
-            # models, run the base rule sequentially for each subspace.
-            # Otherwise, run the base rule once with all models and datasets.
-            if isinstance(self._rule, EfficientGlobalOptimization) and hasattr(
-                self._rule._builder, "single_builder"
-            ) and (len(models) > 1 or OBJECTIVE not in models):
+            # If the base rule is a sequence, run it sequentially for each subspace.
+            # See earlier comments.
+            if isinstance(self._rule, Sequence):
                 points = []
-                for subspace in subspaces:
+                for subspace, rule in zip(subspaces, self._rule):
                     model = subspace.select_model(models)
                     dataset = subspace.select_dataset(datasets)
                     points.append(
-                        self._rule.acquire(
+                        rule.acquire(
                             subspace,
                             # Using default tag, as that is what single model acquisition builders
                             # expect.
@@ -1147,7 +1155,7 @@ def state_func(
                             {OBJECTIVE: dataset},
                         )
                     )
-                points = tf.concat(points, axis=0)
+                points = tf.stack(points, axis=1)
             else:
                 points = self._rule.acquire(acquisition_space, models, datasets)
 
@@ -1305,7 +1313,8 @@ def select_model(
         if self._index is None:
             tags = OBJECTIVE  # If no index, then pick the global model.
         else:
-            num_objective_models = len([tag for tag in models if tag.split("__")[0] == OBJECTIVE])
+            num_objective_models = len([tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE])
+            num_objective_models = max(num_objective_models, 1)
             index = self._index % num_objective_models
             tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local model if available.
         return get_value_for_tag(models, tags)
@@ -1316,7 +1325,8 @@ def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[
         if self._index is None:
             tags = OBJECTIVE  # If no index, then pick the global dataset.
         else:
-            num_objective_datasets = len([tag for tag in datasets if tag.split("__")[0] == OBJECTIVE])
+            num_objective_datasets = len([tag for tag in datasets if "__" in tag and tag.split("__")[0] == OBJECTIVE])
+            num_objective_datasets = max(num_objective_datasets, 1)
             index = self._index % num_objective_datasets
             tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
         return get_value_for_tag(datasets, tags)
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 7c3a01afbe..6548d57b3b 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -757,8 +757,9 @@ def optimize(
                     # If query_points are rank 3, then use a batched observer.
                     if tf.rank(query_points) == 3:
                         num_objective_models = len(
-                            [tag for tag in models if tag.split("__")[0] == OBJECTIVE]
+                            [tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE]
                         )
+                        num_objective_models = max(num_objective_models, 1)
                         observer = mk_batch_observer(observer, num_objective_models, OBJECTIVE)
                     observer_output = observer(query_points)
 

From 28803aa04a0bc0a5574215571ec49f12cb0b50ef Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 3 Oct 2023 17:51:54 +0100
Subject: [PATCH 04/33] Add unit test for keep datasets in regions

---
 tests/unit/acquisition/test_rule.py | 38 ++++++++++++++++++++++++++++-
 trieste/acquisition/rule.py         | 38 +++++++++++++++++++++++++++++
 trieste/bayesian_optimizer.py       | 12 +--------
 3 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index 77dea07f54..693a36cc53 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -24,7 +24,7 @@
 import tensorflow as tf
 import tensorflow_probability as tfp
 
-from tests.util.misc import empty_dataset, quadratic, random_seed
+from tests.util.misc import empty_dataset, mk_dataset, quadratic, random_seed
 from tests.util.models.gpflow.models import (
     GaussianProcess,
     QuadraticMeanAndRBFKernel,
@@ -64,6 +64,7 @@
 from trieste.data import Dataset
 from trieste.models import ProbabilisticModel
 from trieste.models.interfaces import TrainableSupportsGetKernel
+from trieste.objectives.utils import mk_batch_observer
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace, TaggedMultiSearchSpace
 from trieste.types import State, Tag, TensorType
@@ -1633,6 +1634,41 @@ def prepare_acquisition_function(
     npt.assert_allclose(points, exp_points)
 
 
+@pytest.mark.parametrize(
+    "datasets, exp_num_points",
+    [
+        ({OBJECTIVE: mk_dataset([[0.0], [1.0], [2.0]], [[1.0], [1.0], [1.0]])}, 2),
+        ({OBJECTIVE: mk_dataset([[0.0], [1.0], [0.3], [2.0], [0.7], [1.7]], [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0]])}, 3),
+        (
+            {
+                'OBJECTIVE__0': mk_dataset([[0.0]], [[1.0]]),
+                'OBJECTIVE__1': mk_dataset([[1.0]], [[1.0]]),
+                'OBJECTIVE__2': mk_dataset([[2.0]], [[1.0]]),
+            },
+            2,
+        ),
+    ],
+)
+def test_multi_trust_region_box_updated_datasets_are_in_regions(
+    datasets: Mapping[Tag, Dataset], exp_num_points: int
+):
+    search_space = Box([0.0], [3.0])
+    # Non-overlapping regions.
+    subspaces = [TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, i, init_eps=0.4) for i in range(3)]
+    models = {OBJECTIVE: QuadraticMeanAndRBFKernel()}
+    base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
+        builder=MultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=3
+    )
+    rule = BatchTrustRegionBox(subspaces, base_rule)
+    _, points = rule.acquire(search_space, models, datasets)(None)
+    observer = mk_batch_observer(quadratic, 3, OBJECTIVE)
+    new_data = observer(points)
+    datasets = rule.update_datasets(datasets, new_data)
+    for i, subspace in enumerate(subspaces):
+        assert datasets[f"OBJECTIVE__{i}"].query_points.shape[0] == exp_num_points
+        assert datasets[f"OBJECTIVE__{i}"].query_points in subspace
+
+
 def test_multi_trust_region_box_state_deepcopy() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     dataset = Dataset(
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 3e75e7a217..507213cefa 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -148,6 +148,20 @@ def acquire_single(
             datasets=None if dataset is None else {OBJECTIVE: dataset},
         )
 
+    def update_datasets(
+        self, datasets: Mapping[Tag, Dataset], new_datasets: Mapping[Tag, Dataset]
+    ) -> Mapping[Tag, Dataset]:
+        """
+        Update the datasets with new datasets.
+
+        :param datasets: The current datasets.
+        :param new_datasets: The new datasets.
+        :return: The updated datasets.
+        """
+        # By default, we just add the new datasets to the old ones.
+        datasets = {tag: datasets[tag] + new_datasets[tag] for tag in new_datasets}
+        return datasets
+
 
 class EfficientGlobalOptimization(
     AcquisitionRule[TensorType, SearchSpaceType, ProbabilisticModelType]
@@ -1208,6 +1222,30 @@ def get_initialize_subspaces_mask(
         """
         ...
 
+    def update_datasets(
+        self, datasets: Mapping[Tag, Dataset], new_datasets: Mapping[Tag, Dataset]
+    ) -> Mapping[Tag, Dataset]:
+        # Account for the case where there may be an initial dataset that is not tagged
+        # per region. In this case, only the global dataset will exist in datasets. We
+        # want to copy this initial dataset to all the regions.
+        #
+        # If a tag from tagged_output does not exist in datasets, then add it to
+        # datasets by copying the dataset from datasets with the same tag-prefix.
+        # Otherwise keep the existing dataset from datasets.
+        datasets = {
+            tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + new_datasets[tag]
+            for tag in new_datasets
+        }
+
+        #for tag in new_datasets:
+        #    dataset = get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + new_datasets[tag]
+        #    in_tr = self.contains(dataset.query_points)
+        #    in_qps = tf.boolean_mask(dataset.query_points, in_tr)
+        #    in_obs = tf.boolean_mask(dataset.observations, in_tr)
+        #    datasets[tag] = Dataset(in_qps, in_obs)
+
+        return datasets
+
 
 class SingleObjectiveTrustRegionBox(Box, UpdatableTrustRegion):
     """An updatable box search space for use with trust region acquisition rules."""
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 6548d57b3b..0abf5455eb 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -769,17 +769,7 @@ def optimize(
                         else {OBJECTIVE: observer_output}
                     )
 
-                    # Account for the case where there may be an initial dataset that is not tagged
-                    # per region. In this case, only the global dataset will exist in datasets. We
-                    # want to copy this initial dataset to all the regions.
-                    #
-                    # If a tag from tagged_output does not exist in datasets, then add it to
-                    # datasets by copying the dataset from datasets with the same tag-prefix.
-                    # Otherwise keep the existing dataset from datasets.
-                    datasets = {
-                        tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + tagged_output[tag]
-                        for tag in tagged_output
-                    }
+                    datasets = acquisition_rule.update_datasets(datasets, tagged_output)
 
                     with Timer() as model_fitting_timer:
                         if fit_model:

From e99df9680bac2a7787b1c383516e25a5342e23bc Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 6 Oct 2023 17:23:42 +0100
Subject: [PATCH 05/33] Add more tests and move to local tags class

---
 docs/notebooks/trust_region.pct.py    |   6 +-
 tests/unit/acquisition/test_rule.py   | 171 ++++++++++++++------------
 tests/unit/objectives/test_utils.py   |  47 ++++++-
 tests/unit/test_bayesian_optimizer.py |  75 +++++++++++
 tests/unit/utils/test_misc.py         |  14 ++-
 trieste/acquisition/rule.py           | 151 ++++++++++++++---------
 trieste/acquisition/utils.py          |  21 +++-
 trieste/ask_tell_optimization.py      |  67 +++++++---
 trieste/bayesian_optimizer.py         |  59 ++++++---
 trieste/objectives/utils.py           |  46 ++++---
 trieste/utils/misc.py                 |  89 +++++++++++++-
 11 files changed, 541 insertions(+), 205 deletions(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index 12a6da455f..195168ddcd 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -211,7 +211,7 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 ]
 base_rule = trieste.acquisition.rule.EfficientGlobalOptimization(  # type: ignore[var-annotated]
     builder=trieste.acquisition.ParallelContinuousThompsonSampling(),
-    #num_query_points=num_query_points,
+    # num_query_points=num_query_points,
 )
 batch_acq_rule = trieste.acquisition.rule.BatchTrustRegionBox(
     init_subspaces, base_rule
@@ -230,9 +230,9 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 
 num_steps = 5
 result = bo.optimize(
-    #num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
+    # num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
     num_steps,
-    initial_data,
+    {trieste.observer.OBJECTIVE: initial_data},
     trieste.acquisition.utils.copy_to_local_models(build_model(), 2),
     batch_acq_rule,
     track_state=True,
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index 693a36cc53..c03df12e31 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -61,6 +61,7 @@
     ThompsonSampler,
     ThompsonSamplerFromTrajectory,
 )
+from trieste.acquisition.utils import copy_to_local_models
 from trieste.data import Dataset
 from trieste.models import ProbabilisticModel
 from trieste.models.interfaces import TrainableSupportsGetKernel
@@ -68,6 +69,7 @@
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace, TaggedMultiSearchSpace
 from trieste.types import State, Tag, TensorType
+from trieste.utils.misc import LocalTag
 
 
 def _line_search_maximize(
@@ -1172,16 +1174,16 @@ def test_turbo_state_deepcopy() -> None:
     npt.assert_allclose(tr_state_copy.y_min, tr_state.y_min)
 
 
-# get_local_min raises if dataset is None.
-def test_trust_region_box_get_local_min_raises_if_dataset_is_none() -> None:
+# get_dataset_min raises if dataset is None.
+def test_trust_region_box_get_dataset_min_raises_if_dataset_is_none() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     trb = SingleObjectiveTrustRegionBox(search_space)
     with pytest.raises(ValueError, match="dataset must be provided"):
-        trb.get_local_min(None)
+        trb.get_dataset_min(None)
 
 
-# get_local_min picks the minimum x and y values from the dataset.
-def test_trust_region_box_get_local_min() -> None:
+# get_dataset_min picks the minimum x and y values from the dataset.
+def test_trust_region_box_get_dataset_min() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     dataset = Dataset(
         tf.constant([[0.1, 0.1], [0.5, 0.5], [0.3, 0.4], [0.8, 0.8], [0.4, 0.4]], dtype=tf.float64),
@@ -1190,21 +1192,21 @@ def test_trust_region_box_get_local_min() -> None:
     trb = SingleObjectiveTrustRegionBox(search_space)
     trb._lower = tf.constant([0.2, 0.2], dtype=tf.float64)
     trb._upper = tf.constant([0.7, 0.7], dtype=tf.float64)
-    x_min, y_min = trb.get_local_min(dataset)
+    x_min, y_min = trb.get_dataset_min(dataset)
     npt.assert_array_equal(x_min, tf.constant([0.3, 0.4], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
 
 
-# get_local_min returns first x value and inf y value when points in dataset are outside the
+# get_dataset_min returns first x value and inf y value when points in dataset are outside the
 # search space.
-def test_trust_region_box_get_local_min_outside_search_space() -> None:
+def test_trust_region_box_get_dataset_min_outside_search_space() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     dataset = Dataset(
         tf.constant([[1.2, 1.3], [-0.4, -0.5]], dtype=tf.float64),
         tf.constant([[0.7], [0.9]], dtype=tf.float64),
     )
     trb = SingleObjectiveTrustRegionBox(search_space)
-    x_min, y_min = trb.get_local_min(dataset)
+    x_min, y_min = trb.get_dataset_min(dataset)
     npt.assert_array_equal(x_min, tf.constant([1.2, 1.3], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([np.inf], dtype=tf.float64))
 
@@ -1524,103 +1526,92 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
 
 
 # Define a test case with multiple local models and multiple regions
-@pytest.mark.parametrize("num_local_models", [0, 1, 2])
+@pytest.mark.parametrize("use_global_model", [True, False])
 @pytest.mark.parametrize("use_global_dataset", [True, False])
 @pytest.mark.parametrize("num_regions", [2, 4])
 @pytest.mark.parametrize("num_query_points_per_region", [1, 2])
 def test_batch_trust_region_box_with_multiple_models_and_regions(
-    num_local_models: int, use_global_dataset: bool, num_regions: int, num_query_points_per_region: int
-):
+    use_global_model: bool,
+    use_global_dataset: bool,
+    num_regions: int,
+    num_query_points_per_region: int,
+) -> None:
     search_space = Box([0.0, 0.0], [6.0, 6.0])
     base_shift = tf.constant([2.0, 2.0], dtype=tf.float64)  # Common base shift for all regions.
     eps = 0.9
-    subspaces = [TestTrustRegionBox(base_shift+i, search_space, i, init_eps=eps) for i in range(num_regions)]
+    subspaces = [
+        TestTrustRegionBox(base_shift + i, search_space, i, init_eps=eps)
+        for i in range(num_regions)
+    ]
 
     # Define the models and acquisition functions for each region
     noise_variance = tf.constant(1e-6, dtype=tf.float64)
     kernel_variance = tf.constant(1e-3, dtype=tf.float64)
-    kernel_lengthscale = tf.constant(0.1, dtype=tf.float64)
-    #if num_local_models == 0:
-    #    x_shift = tf.constant([0.0, 0.0], dtype=tf.float64)
-    #    init_datasets = {OBJECTIVE: Dataset(
-    #        x_shift, tf.constant([[0.0]], dtype=tf.float64)
-    #    )}
-    #    models = {OBJECTIVE: QuadraticMeanAndRBFKernelWithSamplers(
-    #        init_datasets[OBJECTIVE], x_shift=x_shift, kernel_amplitude=kernel_variance, noise_variance=noise_variance
-    #    )}
-    #else:
-    # Dataset per model, one point at the center/minimum of the region.
+
     global_dataset = Dataset(
         tf.constant([[0.0, 0.0]], dtype=tf.float64),
         tf.constant([[1.0]], dtype=tf.float64),
     )
     init_datasets = {OBJECTIVE: global_dataset}
     models = {}
-    r = range(1) if num_local_models == 0 else range(num_local_models)
+    r = range(1) if use_global_model else range(num_regions)
     for i in r:
-        if num_local_models == 0:
+        if use_global_model:
             tag = OBJECTIVE
             num_models = 1
         else:
-            tag = OBJECTIVE + f"__{i}"
-            num_models = num_local_models
+            tag = LocalTag(OBJECTIVE, i)
+            num_models = num_regions
 
         num_regions_per_model = num_regions // num_models
-        query_points = tf.stack([base_shift+j
-                   for j in range(i, num_regions, num_models)])
+        query_points = tf.stack([base_shift + j for j in range(i, num_regions, num_models)])
         observations = tf.constant([0.0] * num_regions_per_model, dtype=tf.float64)[:, None]
-        #shift = base_shift + i
-        #query_points = tf.stack([
-        #    shift - [eps, 0.0] + [0.0, eps],
-        #    shift,
-        #    shift + [eps, eps],
-        #    shift - [0.0, eps] + [eps, 0.0],
-        #])
-        #observations = tf.constant([[eps**2], [eps**2], [0.0], [eps**2], [eps**2]], dtype=tf.float64)
+
         if not use_global_dataset:
             init_datasets[tag] = Dataset(query_points, observations)
-        #models[tag] = QuadraticMeanAndRBFKernelWithSamplers(
-        #    init_datasets[tag], x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
-        #)
-        #models[tag] = QuadraticMeanAndRBFKernel(
-        #    x_shift=base_shift+i, kernel_amplitude=kernel_variance, noise_variance=noise_variance
-        #)
+
         kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(kernel_variance)
-        mean_function = lambda x, i=i: tf.reduce_prod(tf.stack([quadratic(x - tf.cast(base_shift+j, dtype=x.dtype))
-                   for j in range(i, num_regions, num_models)]),
-                      axis=0)
-        #mean_function = lambda x, i=i: quadratic(x - tf.cast(base_shift+i, dtype=x.dtype))
-        models[tag] = GaussianProcess([mean_function], [kernel], noise_variance)
-        models[tag]._exp_dataset = global_dataset if use_global_dataset else init_datasets[tag]
 
-    #for model in models.values():
-    #    model.kernel = (
-    #        gpflow.kernels.RBF(
-    #            variance=kernel_variance,
-    #            lengthscales=kernel_lengthscale,
-    #        )
-    #    )  # need a gpflow kernel object for random feature decompositions
+        def mean_function(x: TensorType, i: int = i) -> TensorType:
+            return tf.reduce_prod(
+                tf.stack(
+                    [
+                        quadratic(x - tf.cast(base_shift + j, dtype=x.dtype))
+                        for j in range(i, num_regions, num_models)
+                    ]
+                ),
+                axis=0,
+            )
+
+        # mean_function = lambda x, i=i: quadratic(x - tf.cast(base_shift+i, dtype=x.dtype))
+        models[tag] = GaussianProcess([mean_function], [kernel], noise_variance)
+        models[tag]._exp_dataset = (  # type: ignore[attr-defined]
+            global_dataset if use_global_dataset else init_datasets[tag]
+        )
 
-    if num_local_models == 0:
+    if use_global_model:
         # Global model; acquire in parallel.
         num_query_points = num_regions * num_query_points_per_region
     else:
         # Local models; acquire sequentially.
         num_query_points = num_query_points_per_region
 
-    class TestMultipleOptimismNegativeLowerConfidenceBound(MultipleOptimismNegativeLowerConfidenceBound):
+    class TestMultipleOptimismNegativeLowerConfidenceBound(
+        MultipleOptimismNegativeLowerConfidenceBound
+    ):
         # Override the prepare_acquisition_function method to check that the dataset is correct.
         def prepare_acquisition_function(
             self,
             model: ProbabilisticModel,
             dataset: Optional[Dataset] = None,
         ) -> AcquisitionFunction:
-            assert dataset is model._exp_dataset
+            assert dataset is model._exp_dataset  # type: ignore[attr-defined]
             return super().prepare_acquisition_function(model, dataset)
 
     base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
-        #builder=ParallelContinuousThompsonSampling(), num_query_points=num_query_points
-        builder=TestMultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=num_query_points
+        # builder=ParallelContinuousThompsonSampling(), num_query_points=num_query_points
+        builder=TestMultipleOptimismNegativeLowerConfidenceBound(search_space),
+        num_query_points=num_query_points,
     )
 
     mtb = BatchTrustRegionBox(subspaces, base_rule)
@@ -1628,45 +1619,64 @@ def prepare_acquisition_function(
 
     npt.assert_array_equal(points.shape, [num_query_points_per_region, num_regions, 2])
 
-    # Each region should find the minimum of its local model, which will be the center of the region.
-    exp_points = tf.stack([base_shift+i for i in range(num_regions)])
+    # Each region should find the minimum of its local model, which will be the center of the
+    # region.
+    exp_points = tf.stack([base_shift + i for i in range(num_regions)])
     exp_points = tf.tile(exp_points[None, :, :], [num_query_points_per_region, 1, 1])
     npt.assert_allclose(points, exp_points)
 
 
 @pytest.mark.parametrize(
-    "datasets, exp_num_points",
+    "datasets, exp_num_init_points",
     [
-        ({OBJECTIVE: mk_dataset([[0.0], [1.0], [2.0]], [[1.0], [1.0], [1.0]])}, 2),
-        ({OBJECTIVE: mk_dataset([[0.0], [1.0], [0.3], [2.0], [0.7], [1.7]], [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0]])}, 3),
+        ({OBJECTIVE: mk_dataset([[0.0], [1.0], [2.0]], [[1.0], [1.0], [1.0]])}, 1),
         (
             {
-                'OBJECTIVE__0': mk_dataset([[0.0]], [[1.0]]),
-                'OBJECTIVE__1': mk_dataset([[1.0]], [[1.0]]),
-                'OBJECTIVE__2': mk_dataset([[2.0]], [[1.0]]),
+                OBJECTIVE: mk_dataset(
+                    [[0.0], [1.0], [0.3], [2.0], [0.7], [1.7]],
+                    [[1.0], [1.0], [1.0], [1.0], [1.0], [1.0]],
+                )
             },
             2,
         ),
+        (
+            {
+                "OBJECTIVE__0": mk_dataset([[0.0]], [[1.0]]),
+                "OBJECTIVE__1": mk_dataset([[1.0]], [[1.0]]),
+                "OBJECTIVE__2": mk_dataset([[2.0]], [[1.0]]),
+            },
+            1,
+        ),
     ],
 )
+@pytest.mark.parametrize("num_query_points_per_region", [1, 2])
 def test_multi_trust_region_box_updated_datasets_are_in_regions(
-    datasets: Mapping[Tag, Dataset], exp_num_points: int
-):
+    datasets: Mapping[Tag, Dataset], exp_num_init_points: int, num_query_points_per_region: int
+) -> None:
+    num_local_models = 3
     search_space = Box([0.0], [3.0])
     # Non-overlapping regions.
-    subspaces = [TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, i, init_eps=0.4) for i in range(3)]
-    models = {OBJECTIVE: QuadraticMeanAndRBFKernel()}
+    subspaces = [
+        TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, i, init_eps=0.4)
+        for i in range(3)
+    ]
+    models = copy_to_local_models(QuadraticMeanAndRBFKernel(), num_local_models)
     base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
-        builder=MultipleOptimismNegativeLowerConfidenceBound(search_space), num_query_points=3
+        builder=MultipleOptimismNegativeLowerConfidenceBound(search_space),
+        num_query_points=num_query_points_per_region,
     )
     rule = BatchTrustRegionBox(subspaces, base_rule)
     _, points = rule.acquire(search_space, models, datasets)(None)
-    observer = mk_batch_observer(quadratic, 3, OBJECTIVE)
+    observer = mk_batch_observer(quadratic, OBJECTIVE)
     new_data = observer(points)
+    assert not isinstance(new_data, Dataset)
     datasets = rule.update_datasets(datasets, new_data)
     for i, subspace in enumerate(subspaces):
-        assert datasets[f"OBJECTIVE__{i}"].query_points.shape[0] == exp_num_points
-        assert datasets[f"OBJECTIVE__{i}"].query_points in subspace
+        assert (
+            datasets[f"OBJECTIVE__{i}"].query_points.shape[0]
+            == exp_num_init_points + num_query_points_per_region
+        )
+        assert np.all(subspace.contains(datasets[f"OBJECTIVE__{i}"].query_points))
 
 
 def test_multi_trust_region_box_state_deepcopy() -> None:
@@ -1675,7 +1685,10 @@ def test_multi_trust_region_box_state_deepcopy() -> None:
         tf.constant([[0.25, 0.25], [0.5, 0.5], [0.75, 0.75]], dtype=tf.float64),
         tf.constant([[1.0], [1.0], [1.0]], dtype=tf.float64),
     )
-    subspaces = [SingleObjectiveTrustRegionBox(search_space, 0.07, 1e-5, 1e-3) for _ in range(3)]
+    subspaces = [
+        SingleObjectiveTrustRegionBox(search_space, beta=0.07, kappa=1e-5, min_eps=1e-3)
+        for _ in range(3)
+    ]
     for _subspace in subspaces:
         _subspace.initialize(datasets={OBJECTIVE: dataset})
     state = BatchTrustRegionBox.State(acquisition_space=TaggedMultiSearchSpace(subspaces))
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index 52cbbdb827..1c10301cac 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -11,10 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Callable, Union
+
 import numpy.testing as npt
+import pytest
 import tensorflow as tf
 
-from trieste.objectives.utils import mk_multi_observer, mk_observer
+from trieste.data import Dataset
+from trieste.objectives.utils import mk_batch_observer, mk_multi_observer, mk_observer
+from trieste.observer import SingleObserver
+from trieste.types import Tag, TensorType
 
 
 def test_mk_observer() -> None:
@@ -49,3 +55,42 @@ def test_mk_multi_observer() -> None:
     npt.assert_array_equal(ys["foo"].observations, x_ + 1)
     npt.assert_array_equal(ys["bar"].query_points, x_)
     npt.assert_array_equal(ys["bar"].observations, x_ - 1)
+
+
+def test_mk_batch_observer_raises_on_multi_observer() -> None:
+    observer = mk_batch_observer(mk_multi_observer(foo=lambda x: x + 1, bar=lambda x: x - 1))
+    with pytest.raises(ValueError, match="mk_batch_observer does not support multi-observers"):
+        observer(tf.constant([[[3.0]]]))
+
+
+@pytest.mark.parametrize("input_objective", [lambda x: x, lambda x: Dataset(x, x)])
+@pytest.mark.parametrize("batch_size", [1, 2, 3])
+@pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
+@pytest.mark.parametrize("key", [None, "bar"])
+def test_mk_batch_observer(
+    input_objective: Union[Callable[[TensorType], TensorType], SingleObserver],
+    batch_size: int,
+    num_query_points_per_batch: int,
+    key: Tag,
+) -> None:
+    x_ = tf.reshape(
+        tf.constant(range(batch_size * num_query_points_per_batch), tf.float64),
+        (num_query_points_per_batch, batch_size, 1),
+    )
+    ys = mk_batch_observer(input_objective, key)(x_)
+
+    if key is None:
+        assert isinstance(ys, Dataset)
+        npt.assert_array_equal(ys.query_points, tf.reshape(x_, [-1, 1]))
+        npt.assert_array_equal(ys.observations, tf.reshape(x_, [-1, 1]))
+    else:
+        assert isinstance(ys, dict)
+        if batch_size == 1:
+            assert ys.keys() == {key}
+            npt.assert_array_equal(ys[key].query_points, x_[:, 0])
+            npt.assert_array_equal(ys[key].observations, x_[:, 0])
+        else:
+            assert ys.keys() == {f"{key}__{i}" for i in range(batch_size)}
+            for i in range(batch_size):
+                npt.assert_array_equal(ys[f"{key}__{i}"].query_points, x_[:, i])
+                npt.assert_array_equal(ys[f"{key}__{i}"].observations, x_[:, i])
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index 9e15032d6b..90135d90f7 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -38,6 +38,7 @@
     rbf,
 )
 from trieste.acquisition.rule import AcquisitionRule
+from trieste.acquisition.utils import copy_to_local_models
 from trieste.bayesian_optimizer import BayesianOptimizer, FrozenRecord, OptimizationResult, Record
 from trieste.data import Dataset
 from trieste.models import ProbabilisticModel, TrainableProbabilisticModel
@@ -45,6 +46,7 @@
 from trieste.space import Box, SearchSpace
 from trieste.types import State, Tag, TensorType
 from trieste.utils import Err, Ok
+from trieste.utils.misc import LocalTag
 
 # tags
 FOO: Tag = "foo"
@@ -236,6 +238,79 @@ def __call__(self, x: tf.Tensor) -> Dataset:
     assert observer.call_count == steps
 
 
+@pytest.mark.parametrize("use_global_model", [True, False])
+@pytest.mark.parametrize("use_global_init_dataset", [True, False])
+@pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
+def test_bayesian_optimizer_creates_correct_datasets_for_rank3_points(
+    use_global_model: bool, use_global_init_dataset: bool, num_query_points_per_batch: int
+) -> None:
+    batch_size = 4
+    if use_global_init_dataset:
+        init_data = {OBJECTIVE: mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])}
+    else:
+        init_data = {
+            LocalTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
+            for i in range(batch_size)
+        }
+
+    query_points = tf.reshape(
+        tf.constant(range(batch_size * num_query_points_per_batch), tf.float64),
+        (num_query_points_per_batch, batch_size, 1),
+    )
+
+    class DatasetChecker(_PseudoTrainableQuadratic):
+        def __init__(self) -> None:
+            super().__init__()
+            self.update_count = 0
+            self._tag = OBJECTIVE
+
+        def update(self, dataset: Dataset) -> None:
+            if use_global_model:
+                if use_global_init_dataset:
+                    exp_init_qps = init_data[OBJECTIVE].query_points
+                else:
+                    exp_init_qps = tf.stack([data.query_points for data in init_data.values()], 1)
+                    exp_init_qps = tf.reshape(exp_init_qps, [-1, 1])
+            else:
+                if use_global_init_dataset:
+                    exp_init_qps = init_data[OBJECTIVE].query_points
+                else:
+                    exp_init_qps = init_data[self._tag].query_points
+
+            if self.update_count == 0:
+                # Initial model training.
+                exp_qps = exp_init_qps
+            else:
+                # Subsequent model training.
+                if use_global_model:
+                    if use_global_init_dataset:
+                        _exp_init_qps = tf.tile(exp_init_qps[:, None], [1, batch_size, 1])
+                    else:
+                        _exp_init_qps = tf.reshape(exp_init_qps, (-1, batch_size, 1))
+                    exp_qps = tf.concat([_exp_init_qps, query_points], 0)
+                    exp_qps = tf.reshape(exp_qps, [-1, 1])
+                else:
+                    index = LocalTag.from_tag(self._tag).local_index
+                    exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
+
+            npt.assert_array_equal(exp_qps, dataset.query_points)
+            self.update_count += 1
+
+    search_space = Box([-1], [1])
+
+    model = DatasetChecker()
+    if use_global_model:
+        models = {OBJECTIVE: model}
+    else:
+        models = copy_to_local_models(model, batch_size)  # type: ignore[assignment]
+    for tag, model in models.items():
+        model._tag = tag
+
+    optimizer = BayesianOptimizer(lambda x: Dataset(x, x), search_space)
+    rule = FixedAcquisitionRule(query_points)
+    optimizer.optimize(1, init_data, models, rule).final_result.unwrap()
+
+
 @pytest.mark.parametrize("mode", ["early", "fail", "full"])
 def test_bayesian_optimizer_continue_optimization(mode: str) -> None:
     class _CountingObserver:
diff --git a/tests/unit/utils/test_misc.py b/tests/unit/utils/test_misc.py
index ac6dd642eb..aa7d581dd6 100644
--- a/tests/unit/utils/test_misc.py
+++ b/tests/unit/utils/test_misc.py
@@ -97,20 +97,26 @@ def test_err() -> None:
 
 
 def test_get_value_for_tag_returns_none_if_mapping_is_none() -> None:
-    assert get_value_for_tag(None) is None
+    assert get_value_for_tag(None) == (None, None)
 
 
 def test_get_value_for_tag_raises_if_tag_not_in_mapping() -> None:
-    with pytest.raises(ValueError, match="tag 'baz' not found in mapping"):
+    with pytest.raises(ValueError, match="none of the tags '\['baz'\]' found in mapping"):
         get_value_for_tag({"foo": "bar"}, "baz")
 
 
 def test_get_value_for_tag_returns_value_for_default_tag() -> None:
-    assert get_value_for_tag({"foo": "bar", OBJECTIVE: "baz"}) == "baz"
+    assert get_value_for_tag({"foo": "bar", OBJECTIVE: "baz"}) == (OBJECTIVE, "baz")
 
 
 def test_get_value_for_tag_returns_value_for_specified_tag() -> None:
-    assert get_value_for_tag({"foo": "bar", OBJECTIVE: "baz"}, "foo") == "bar"
+    assert get_value_for_tag({"foo": "bar", OBJECTIVE: "baz"}, "foo") == ("foo", "bar")
+
+
+def test_get_value_for_tag_returns_first_matching_tag() -> None:
+    assert get_value_for_tag(
+        {"foo": "bar", OBJECTIVE: "baz", "qux": "quux", "bar": "baz"}, ["far", "qux", "foo"]
+    ) == ("qux", "quux")
 
 
 def test_Timer() -> None:
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 507213cefa..df6e504398 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -50,7 +50,7 @@
 from ..observer import OBJECTIVE
 from ..space import Box, SearchSpace, TaggedMultiSearchSpace
 from ..types import State, Tag, TensorType
-from ..utils.misc import get_value_for_tag
+from ..utils.misc import LocalTag, get_value_for_tag
 from .function import (
     BatchMonteCarloExpectedImprovement,
     ExpectedImprovement,
@@ -158,8 +158,24 @@ def update_datasets(
         :param new_datasets: The new datasets.
         :return: The updated datasets.
         """
-        # By default, we just add the new datasets to the old ones.
-        datasets = {tag: datasets[tag] + new_datasets[tag] for tag in new_datasets}
+        # Account for the case where there may be an initial dataset that is not tagged
+        # per region. In this case, only the global dataset will exist in datasets. We
+        # want to copy this initial dataset to all the regions.
+        #
+        # If a tag from tagged_output does not exist in datasets, then add it to
+        # datasets by copying the dataset from datasets with the same tag-prefix.
+        # Otherwise keep the existing dataset from datasets.
+        # TODO: this could mean that when we have a global model, the global dataset
+        #   can contain multiple copies of the initial dataset.
+        # datasets = {
+        #    tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]])[1] + new_datasets[tag]
+        #    for tag in new_datasets
+        # }
+        datasets = {}
+        for tag in new_datasets:
+            _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
+            assert dataset is not None
+            datasets[tag] = dataset + new_datasets[tag]
         return datasets
 
 
@@ -988,7 +1004,7 @@ def update(
 
     def select_model(
         self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Optional[ProbabilisticModelType]:
+    ) -> Tuple[Optional[Tag], Optional[ProbabilisticModelType]]:
         """
         Select a single model belonging to this region. This is an optional method that is
         only required if the region is used with single model acquisition functions.
@@ -999,7 +1015,9 @@ def select_model(
         # By default return the OBJECTIVE model.
         return get_value_for_tag(models, OBJECTIVE)
 
-    def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[Dataset]:
+    def select_dataset(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[Optional[Tag], Optional[Dataset]]:
         """
         Select a single dataset belonging to this region. This is an optional method that is
         only required if the region is used with single model acquisition functions.
@@ -1065,6 +1083,9 @@ def __init__(
             self._tags = tuple([str(index) for index in range(len(init_subspaces))])
 
         self._rule = rule
+        self._rules: Optional[
+            Sequence[AcquisitionRule[TensorType, SearchSpace, ProbabilisticModelType]]
+        ] = None
 
     def __repr__(self) -> str:
         """"""
@@ -1098,11 +1119,17 @@ def acquire(
         assert self._init_subspaces is not None
 
         num_subspaces = len(self._tags)
-        num_objective_models = len([tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE])
-        num_objective_models = max(num_objective_models, 1)
-        assert num_subspaces % num_objective_models == 0, (
+        self.num_local_models = len(
+            [
+                tag
+                for tag in models
+                if (ltag := LocalTag.from_tag(tag)).is_local and ltag.global_tag == OBJECTIVE
+            ]
+        )
+        self.num_local_models = max(self.num_local_models, 1)
+        assert num_subspaces % self.num_local_models == 0, (
             f"The number of subspaces {num_subspaces} should be a multiple of the number of "
-            f"objective models {num_objective_models}"
+            f"local objective models {self.num_local_models}"
         )
 
         # If the base rule is a single model acquisition rule, but we have (multiple) local
@@ -1110,10 +1137,12 @@ def acquire(
         # Otherwise, run the base rule as is, once with all models and datasets.
         # Note: this should only trigger on the first call to `acquire`, as after that `self._rule`
         # will be a list of rules.
-        if isinstance(self._rule, EfficientGlobalOptimization) and hasattr(
-            self._rule._builder, "single_builder"
-        ) and (num_objective_models > 1 or OBJECTIVE not in models):
-            self._rule = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
+        if (
+            isinstance(self._rule, EfficientGlobalOptimization)
+            and hasattr(self._rule._builder, "single_builder")
+            and (self.num_local_models > 1 or OBJECTIVE not in models)
+        ):
+            self._rules = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
 
         def state_func(
             state: BatchTrustRegion.State | None,
@@ -1155,21 +1184,20 @@ def state_func(
 
             # If the base rule is a sequence, run it sequentially for each subspace.
             # See earlier comments.
-            if isinstance(self._rule, Sequence):
-                points = []
-                for subspace, rule in zip(subspaces, self._rule):
-                    model = subspace.select_model(models)
-                    dataset = subspace.select_dataset(datasets)
-                    points.append(
-                        rule.acquire(
-                            subspace,
-                            # Using default tag, as that is what single model acquisition builders
-                            # expect.
-                            {OBJECTIVE: model},
-                            {OBJECTIVE: dataset},
-                        )
-                    )
-                points = tf.stack(points, axis=1)
+            if self._rules is not None:
+                _points = []
+                for subspace, rule in zip(subspaces, self._rules):
+                    _, _model = subspace.select_model(models)
+                    _, _dataset = subspace.select_dataset(datasets)
+                    assert _model is not None
+                    # Using default tag, as that is what single model acquisition builders expect.
+                    model = {OBJECTIVE: _model}
+                    if _dataset is None:
+                        dataset = None
+                    else:
+                        dataset = {OBJECTIVE: _dataset}
+                    _points.append(rule.acquire(subspace, model, dataset))
+                points = tf.stack(_points, axis=1)
             else:
                 points = self._rule.acquire(acquisition_space, models, datasets)
 
@@ -1225,26 +1253,30 @@ def get_initialize_subspaces_mask(
     def update_datasets(
         self, datasets: Mapping[Tag, Dataset], new_datasets: Mapping[Tag, Dataset]
     ) -> Mapping[Tag, Dataset]:
-        # Account for the case where there may be an initial dataset that is not tagged
-        # per region. In this case, only the global dataset will exist in datasets. We
-        # want to copy this initial dataset to all the regions.
-        #
-        # If a tag from tagged_output does not exist in datasets, then add it to
-        # datasets by copying the dataset from datasets with the same tag-prefix.
-        # Otherwise keep the existing dataset from datasets.
-        datasets = {
-            tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + new_datasets[tag]
-            for tag in new_datasets
-        }
+        datasets = super().update_datasets(datasets, new_datasets)
 
-        #for tag in new_datasets:
-        #    dataset = get_value_for_tag(datasets, [tag, tag.split("__")[0]]) + new_datasets[tag]
-        #    in_tr = self.contains(dataset.query_points)
-        #    in_qps = tf.boolean_mask(dataset.query_points, in_tr)
-        #    in_obs = tf.boolean_mask(dataset.observations, in_tr)
-        #    datasets[tag] = Dataset(in_qps, in_obs)
+        used_masks = {
+            tag: tf.zeros(dataset.query_points.shape[:-1], dtype=tf.bool)
+            for tag, dataset in datasets.items()
+        }
+        # TODO: using init_subspaces here is a bit of a hack, but it works for now.
+        assert self._init_subspaces is not None
+        for subspace in self._init_subspaces:
+            tag, space_dataset = subspace.select_dataset(datasets)
+            assert space_dataset is not None
+            in_region = subspace.contains(space_dataset.query_points)
+            # Assign slice of used_masks on axis 1 at index from values in
+            # in_region, i.e. ued_mask[index] = in_region
+            used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
+
+        filtered_datasets = {}
+        for tag, used_mask in used_masks.items():
+            filtered_datasets[tag] = Dataset(
+                tf.boolean_mask(datasets[tag].query_points, used_mask),
+                tf.boolean_mask(datasets[tag].observations, used_mask),
+            )
 
-        return datasets
+        return filtered_datasets
 
 
 class SingleObjectiveTrustRegionBox(Box, UpdatableTrustRegion):
@@ -1304,7 +1336,7 @@ def initialize(
         Initialize the box by sampling a location from the global search space and setting the
         bounds.
         """
-        dataset = self.select_dataset(datasets)
+        _, dataset = self.select_dataset(datasets)
 
         self.location = tf.squeeze(self.global_search_space.sample(1), axis=0)
         self._step_is_success = False
@@ -1328,7 +1360,7 @@ def update(
         ``1 / beta``. Conversely, if it was unsuccessful, the size is reduced by the factor
         ``beta``.
         """
-        dataset = self.select_dataset(datasets)
+        _, dataset = self.select_dataset(datasets)
 
         if tf.reduce_any(self.eps < self._min_eps):
             self.initialize(models, datasets)
@@ -1345,28 +1377,27 @@ def update(
 
     def select_model(
         self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Optional[ProbabilisticModelType]:
+    ) -> Tuple[Optional[Tag], Optional[ProbabilisticModelType]]:
         # Select the model belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and models.
         if self._index is None:
-            tags = OBJECTIVE  # If no index, then pick the global model.
+            tags = [OBJECTIVE]  # If no index, then pick the global model.
         else:
-            num_objective_models = len([tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE])
-            num_objective_models = max(num_objective_models, 1)
-            index = self._index % num_objective_models
-            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local model if available.
+            tags = [LocalTag(OBJECTIVE, self._index), OBJECTIVE]  # Prefer local model if available.
         return get_value_for_tag(models, tags)
 
-    def select_dataset(self, datasets: Optional[Mapping[Tag, Dataset]]) -> Optional[Dataset]:
+    def select_dataset(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[Optional[Tag], Optional[Dataset]]:
         # Select the dataset belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and datasets.
         if self._index is None:
-            tags = OBJECTIVE  # If no index, then pick the global dataset.
+            tags = [OBJECTIVE]  # If no index, then pick the global dataset.
         else:
-            num_objective_datasets = len([tag for tag in datasets if "__" in tag and tag.split("__")[0] == OBJECTIVE])
-            num_objective_datasets = max(num_objective_datasets, 1)
-            index = self._index % num_objective_datasets
-            tags = [f"{OBJECTIVE}__{index}", OBJECTIVE]  # Prefer local dataset if available.
+            tags = [
+                LocalTag(OBJECTIVE, self._index),
+                OBJECTIVE,
+            ]  # Prefer local dataset if available.
         return get_value_for_tag(datasets, tags)
 
     @check_shapes(
diff --git a/trieste/acquisition/utils.py b/trieste/acquisition/utils.py
index 602afebc77..fbe1ee7af4 100644
--- a/trieste/acquisition/utils.py
+++ b/trieste/acquisition/utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import copy
 import functools
-from typing import Mapping, Tuple, Union
+from typing import Mapping, Sequence, Tuple, Union
 
 import tensorflow as tf
 from check_shapes import check_shapes
@@ -157,6 +157,25 @@ def copy_to_local_models(
     return {f"{key}__{i}": copy.deepcopy(global_model) for i in range(num_local_models)}
 
 
+def stack_datasets(datasets: Sequence[Dataset]) -> Dataset:
+    """
+    Stack a sequence of datasets along a new second batch axis.
+
+    :param datasets: A sequence of datasets.
+    :return: A dataset whose query points and observations are the stack of the query points
+        and observations in ``datasets`` along the second axis.
+    :raise ValueError: If ``datasets`` is empty.
+    :raise InvalidArgumentError: If the shapes of the query points in ``datasets`` differ in any
+        but the first dimension. The same applies for observations.
+    """
+    if not datasets:
+        raise ValueError("datasets must be non-empty")
+
+    qps = tf.stack([dataset.query_points for dataset in datasets], axis=1)
+    obs = tf.stack([dataset.observations for dataset in datasets], axis=1)
+    return Dataset(tf.reshape(qps, [-1, qps.shape[-1]]), tf.reshape(obs, [-1, obs.shape[-1]]))
+
+
 @check_shapes(
     "points: [n_points, ...]",
     "return: [n_points]",
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index f3528ed943..c0f7e627a5 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -32,6 +32,7 @@
 
 from . import logging
 from .acquisition.rule import TURBO, AcquisitionRule, EfficientGlobalOptimization
+from .acquisition.utils import stack_datasets
 from .bayesian_optimizer import (
     FrozenRecord,
     OptimizationResult,
@@ -43,10 +44,12 @@
 )
 from .data import Dataset
 from .models import TrainableProbabilisticModel
+from .objectives.utils import mk_batch_observer
 from .observer import OBJECTIVE
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Ok, Timer
+from .utils.misc import LocalTag, get_value_for_tag, get_values_for_tag_prefix
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -188,16 +191,19 @@ def __init__(
 
         if isinstance(datasets, Dataset):
             datasets = {OBJECTIVE: datasets}
-            models = {OBJECTIVE: models}  # type: ignore[dict-item]
+        if not isinstance(models, Mapping):
+            models = {OBJECTIVE: models}
 
         # reassure the type checker that everything is tagged
-        datasets = cast(Dict[Tag, Dataset], datasets)
         models = cast(Dict[Tag, TrainableProbabilisticModelType], models)
 
-        if datasets.keys() != models.keys():
+        # Get set of dataset and model keys, ignoring any local tag index.
+        datasets_keys = {LocalTag.from_tag(tag).global_tag for tag in datasets.keys()}
+        models_keys = {LocalTag.from_tag(tag).global_tag for tag in models.keys()}
+        if datasets_keys != models_keys:
             raise ValueError(
-                f"datasets and models should contain the same keys. Got {datasets.keys()} and"
-                f" {models.keys()} respectively."
+                f"datasets and models should contain the same keys. Got {datasets_keys} and"
+                f" {models_keys} respectively."
             )
 
         self._datasets = datasets
@@ -231,7 +237,19 @@ def __init__(
         if fit_model:
             with Timer() as initial_model_fitting_timer:
                 for tag, model in self._models.items():
-                    dataset = datasets[tag]
+                    if LocalTag.from_tag(tag).is_local or tag in datasets:
+                        tags = [
+                            tag,
+                            LocalTag.from_tag(tag).global_tag,
+                        ]  # Prefer local dataset if available.
+                        _, dataset = get_value_for_tag(datasets, tags)
+                    else:
+                        # Global model. If global dataset does not exist, create
+                        # one by concatenating all the local datasets.
+                        dataset = stack_datasets(
+                            get_values_for_tag_prefix(datasets, tag)
+                        )
+                    assert dataset is not None
                     model.update(dataset)
                     model.optimize_and_save_result(dataset)
 
@@ -256,10 +274,14 @@ def datasets(self) -> Mapping[Tag, Dataset]:
     @property
     def dataset(self) -> Dataset:
         """The current dataset when there is just one dataset."""
-        if len(self.datasets) == 1:
-            return next(iter(self.datasets.values()))
+        # Ignore local datasets.
+        datasets: Mapping[Tag, Dataset] = dict(
+            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.datasets.items())
+        )
+        if len(datasets) == 1:
+            return next(iter(datasets.values()))
         else:
-            raise ValueError(f"Expected a single dataset, found {len(self.datasets)}")
+            raise ValueError(f"Expected a single dataset, found {len(datasets)}")
 
     @property
     def models(self) -> Mapping[Tag, TrainableProbabilisticModelType]:
@@ -279,10 +301,14 @@ def models(self, models: Mapping[Tag, TrainableProbabilisticModelType]) -> None:
     @property
     def model(self) -> TrainableProbabilisticModel:
         """The current model when there is just one model."""
-        if len(self.models) == 1:
-            return next(iter(self.models.values()))
+        # Ignore local models.
+        models: Mapping[Tag, TrainableProbabilisticModel] = dict(
+            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.models.items())
+        )
+        if len(models) == 1:
+            return next(iter(models.values()))
         else:
-            raise ValueError(f"Expected a single model, found {len(self.models)}")
+            raise ValueError(f"Expected a single model, found {len(models)}")
 
     @model.setter
     def model(self, model: TrainableProbabilisticModelType) -> None:
@@ -427,12 +453,23 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
                 f"match dataset keys {self._datasets.keys()}"
             )
 
-        for tag in self._datasets:
-            self._datasets[tag] += new_data[tag]
+        self._datasets = self._acquisition_rule.update_datasets(self._datasets, new_data)
 
         with Timer() as model_fitting_timer:
             for tag, model in self._models.items():
-                dataset = self._datasets[tag]
+                if LocalTag.from_tag(tag).is_local or tag in self._datasets:
+                    tags = [
+                        tag,
+                        LocalTag.from_tag(tag).global_tag,
+                    ]  # Prefer local dataset if available.
+                    _, dataset = get_value_for_tag(self._datasets, tags)
+                else:
+                    # Global model. If global dataset does not exist, create
+                    # one by concatenating all the local datasets.
+                    dataset = stack_datasets(
+                        get_values_for_tag_prefix(self._datasets, tag)
+                    )
+                assert dataset is not None
                 model.update(dataset)
                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 0abf5455eb..4d928edd00 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -55,6 +55,7 @@
 
 from . import logging
 from .acquisition.rule import TURBO, AcquisitionRule, EfficientGlobalOptimization
+from .acquisition.utils import stack_datasets
 from .data import Dataset
 from .models import SupportsCovarianceWithTopFidelity, TrainableProbabilisticModel
 from .objectives.utils import mk_batch_observer
@@ -62,7 +63,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Err, Ok, Result, Timer
-from .utils.misc import get_value_for_tag
+from .utils.misc import LocalTag, get_value_for_tag, get_values_for_tag_prefix
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -99,7 +100,9 @@ class Record(Generic[StateType]):
     def dataset(self) -> Dataset:
         """The dataset when there is just one dataset."""
         # Ignore local datasets.
-        datasets = dict(filter(lambda item: "__" not in item[0], self.datasets.items()))
+        datasets: Mapping[Tag, Dataset] = dict(
+            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.datasets.items())
+        )
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -109,7 +112,9 @@ def dataset(self) -> Dataset:
     def model(self) -> TrainableProbabilisticModel:
         """The model when there is just one dataset."""
         # Ignore local models.
-        models = dict(filter(lambda item: "__" not in item[0], self.models.items()))
+        models: Mapping[Tag, TrainableProbabilisticModel] = dict(
+            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.models.items())
+        )
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -233,7 +238,9 @@ def try_get_final_dataset(self) -> Dataset:
         """
         datasets = self.try_get_final_datasets()
         # Ignore local datasets.
-        datasets = dict(filter(lambda item: "__" not in item[0], datasets.items()))
+        datasets = dict(
+            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, datasets.items())
+        )
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -278,7 +285,7 @@ def try_get_final_model(self) -> TrainableProbabilisticModel:
         """
         models = self.try_get_final_models()
         # Ignore local models.
-        models = dict(filter(lambda item: "__" not in item[0], models.items()))
+        models = dict(filter(lambda item: not LocalTag.from_tag(item[0]).is_local, models.items()))
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -638,7 +645,7 @@ def optimize(
         if isinstance(datasets, Dataset):
             datasets = {OBJECTIVE: datasets}
         if not isinstance(models, Mapping):
-            models = {OBJECTIVE: models}  # type: ignore[dict-item]
+            models = {OBJECTIVE: models}
 
         # reassure the type checker that everything is tagged
         datasets = cast(Dict[Tag, Dataset], datasets)
@@ -647,9 +654,9 @@ def optimize(
         if num_steps < 0:
             raise ValueError(f"num_steps must be at least 0, got {num_steps}")
 
-        # Get set of dataset keys, ignoring suffix starting with double underscore.
-        datasets_keys = {tag.split("__")[0] for tag in datasets.keys()}
-        models_keys = {tag.split("__")[0] for tag in models.keys()}
+        # Get set of dataset and model keys, ignoring any local tag index.
+        datasets_keys = {LocalTag.from_tag(tag).global_tag for tag in datasets.keys()}
+        models_keys = {LocalTag.from_tag(tag).global_tag for tag in models.keys()}
         if datasets_keys != models_keys:
             raise ValueError(
                 f"datasets and models should contain the same keys. Got {datasets_keys} and"
@@ -731,8 +738,17 @@ def optimize(
                 if step == 1 and fit_model and fit_initial_model:
                     with Timer() as initial_model_fitting_timer:
                         for tag, model in models.items():
-                            tags = [tag, tag.split("__")[0]]  # Prefer local dataset if available.
-                            dataset = get_value_for_tag(datasets, tags)
+                            if LocalTag.from_tag(tag).is_local or tag in datasets:
+                                tags = [
+                                    tag,
+                                    LocalTag.from_tag(tag).global_tag,
+                                ]  # Prefer local dataset if available.
+                                _, dataset = get_value_for_tag(datasets, tags)
+                            else:
+                                # Global model. If global dataset does not exist, create
+                                # one by concatenating all the local datasets.
+                                dataset = stack_datasets(get_values_for_tag_prefix(datasets, tag))
+                            assert dataset is not None
                             model.update(dataset)
                             model.optimize_and_save_result(dataset)
                     if summary_writer:
@@ -756,11 +772,7 @@ def optimize(
                     observer = self._observer
                     # If query_points are rank 3, then use a batched observer.
                     if tf.rank(query_points) == 3:
-                        num_objective_models = len(
-                            [tag for tag in models if "__" in tag and tag.split("__")[0] == OBJECTIVE]
-                        )
-                        num_objective_models = max(num_objective_models, 1)
-                        observer = mk_batch_observer(observer, num_objective_models, OBJECTIVE)
+                        observer = mk_batch_observer(observer, OBJECTIVE)
                     observer_output = observer(query_points)
 
                     tagged_output = (
@@ -774,8 +786,19 @@ def optimize(
                     with Timer() as model_fitting_timer:
                         if fit_model:
                             for tag, model in models.items():
-                                tags = [tag, tag.split("__")[0]]  # Prefer local dataset if available.
-                                dataset = get_value_for_tag(datasets, tags)
+                                if LocalTag.from_tag(tag).is_local or tag in datasets:
+                                    tags = [
+                                        tag,
+                                        LocalTag.from_tag(tag).global_tag,
+                                    ]  # Prefer local dataset if available.
+                                    _, dataset = get_value_for_tag(datasets, tags)
+                                else:
+                                    # Global model. If global dataset does not exist, create
+                                    # one by concatenating all the local datasets.
+                                    dataset = stack_datasets(
+                                        get_values_for_tag_prefix(datasets, tag)
+                                    )
+                                assert dataset is not None
                                 model.update(dataset)
                                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 69714c8686..f177d34cd9 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -18,14 +18,17 @@
 """
 
 from __future__ import annotations
-import tensorflow as tf
 
 from collections.abc import Callable
 from typing import Mapping, Optional, Union, overload
 
+import tensorflow as tf
+from check_shapes import check_shapes
+
 from ..data import Dataset
-from ..observer import OBJECTIVE, MultiObserver, Observer, SingleObserver
+from ..observer import MultiObserver, Observer, SingleObserver
 from ..types import Tag, TensorType
+from ..utils.misc import LocalTag
 
 
 @overload
@@ -62,7 +65,6 @@ def mk_multi_observer(**kwargs: Callable[[TensorType], TensorType]) -> MultiObse
 
 def mk_batch_observer(
     objective_or_observer: Union[Callable[[TensorType], TensorType], SingleObserver],
-    batch_size: int,
     key: Optional[Tag] = None,
 ) -> Observer:
     """
@@ -71,38 +73,46 @@ def mk_batch_observer(
 
     :param objective_or_observer: An objective or an existing observer designed to be used with a
         single data set and model.
-    :param batch_size: The batch size of the observer.
     :param key: An optional key to use to access the data from the observer result.
     :return: A multi-observer across the batch dimension of query points, returning the data from
         ``objective``. If ``key`` is provided, the observer will be a mapping. Otherwise, it will
         return a single dataset.
+    :raise ValueError (or tf.errors.InvalidArgumentError): If the query points are not rank 3.
+    :raise ValueError (or tf.errors.InvalidArgumentError): If ``objective_or_observer`` is a
+        multi-observer.
     """
 
+    @check_shapes("qps: [n_points, batch_size, n_dims]")
+    # Note that the return type is not correct, but that is what mypy is happy with. It should be
+    # Mapping[Tag, Dataset] if key is not None, otherwise Dataset.
+    # One solution is to create two separate functions, but that will result in some duplicate code.
     def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
-        assert tf.rank(qps) == 3, (
-            f"query points must be rank 3 for batch observer, got {tf.rank(qps)}"
-        )
-
         # Call objective with rank 2 query points by flattening batch dimension.
         # Some objectives might only expect rank 2 query points, so this is safer.
+        batch_size = qps.shape[1]
         qps = tf.reshape(qps, [-1, qps.shape[-1]])
         obs_or_dataset = objective_or_observer(qps)
 
-        if not isinstance(obs_or_dataset, Dataset):
+        if isinstance(obs_or_dataset, Mapping):
+            raise ValueError("mk_batch_observer does not support multi-observers")
+        elif not isinstance(obs_or_dataset, Dataset):
             obs_or_dataset = Dataset(qps, obs_or_dataset)
 
         if key is None:
             # Always use rank 2 shape as models (e.g. GPR) expect this, so return as is.
             return obs_or_dataset
         else:
-            # Include overall dataset and per batch dataset.
-            obs = obs_or_dataset.observations
-            qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
-            obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
-            datasets = {
-                OBJECTIVE: obs_or_dataset,
-                **{f"{key}__{i}": Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)}
-            }
-            return datasets
+            if batch_size == 1:
+                # If batch size is 1, just return the dataset as is, i.e. use the global dataset.
+                return {key: obs_or_dataset}
+            else:
+                # Include per batch dataset.
+                obs = obs_or_dataset.observations
+                qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
+                obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
+                datasets: Mapping[Tag, Dataset] = {
+                    **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)}
+                }
+                return datasets
 
     return _observer
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index a42a9c114c..7625b06c9d 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -14,9 +14,23 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from time import perf_counter
 from types import TracebackType
-from typing import Any, Callable, Generic, Mapping, NoReturn, Optional, Sequence, Tuple, Type, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    Generic,
+    List,
+    Mapping,
+    NoReturn,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 
 import numpy as np
 import tensorflow as tf
@@ -222,12 +236,13 @@ def map_values(f: Callable[[U], V], mapping: Mapping[K, U]) -> Mapping[K, V]:
 
 def get_value_for_tag(
     mapping: Optional[Mapping[Tag, T]], tags: Union[Tag, Sequence[Tag]] = OBJECTIVE
-) -> Optional[T]:
+) -> Tuple[Optional[Tag], Optional[T]]:
     """Return the value from a mapping for the first tag found from a sequence of tags.
 
     :param mapping: A mapping from tags to values.
     :param tags: A tag or a sequence of tags. Sequence is searched in order.
-    :return: The value of the tag in the mapping, or None if the mapping is None.
+    :return: The chosen tag and value of the tag in the mapping, or None for each if the mapping is
+        None.
     :raises ValueError: If none of the tags are in the mapping and the mapping is not None.
     """
 
@@ -235,13 +250,75 @@ def get_value_for_tag(
         tags = [tags]
 
     if mapping is None:
-        return None
-    elif matched_tags := sorted(set(tags) & set(mapping.keys()), key = tags.index):
-        return mapping[matched_tags[0]]
+        return None, None
+    elif matched_tags := sorted(set(tags) & set(mapping.keys()), key=tags.index):
+        return matched_tags[0], mapping[matched_tags[0]]
     else:
         raise ValueError(f"none of the tags '{tags}' found in mapping")
 
 
+@dataclass(frozen=True)
+class LocalTag:
+    """Manage a tag for a local model or dataset."""
+
+    global_tag: Tag
+    local_index: Optional[int]
+
+    def __post_init__(self) -> None:
+        if self.is_local and (self.local_index is None or self.local_index < 0):
+            raise ValueError("local index must be non-negative")
+
+    @property
+    def is_local(self) -> bool:
+        """Return True if the tag is a local tag."""
+        return self.local_index is not None
+
+    @property
+    def tag(self) -> Tag:
+        """The local tag."""
+        if self.is_local:
+            return f"{self.global_tag}__{self.local_index}"
+        else:
+            return self.global_tag
+
+    def __str__(self) -> str:
+        """Return the local tag."""
+        return str(self.tag)
+
+    def __hash__(self) -> int:
+        """Return the hash of the local tag."""
+        return hash(self.tag)
+
+    def __eq__(self, other: object) -> bool:
+        """Return True if the local tag is equal to the other object."""
+        return isinstance(other, LocalTag) and self.tag == other.tag
+
+    @staticmethod
+    def from_tag(tag: Tag) -> LocalTag:
+        """Return a LocalTag from a given tag."""
+        tag = str(tag)
+        if "__" in tag:
+            global_tag, _local_index = tag.split("__")
+            local_index = int(_local_index)
+        else:
+            global_tag, local_index = tag, None
+        return LocalTag(global_tag, local_index)
+
+
+def get_values_for_tag_prefix(mapping: Mapping[Tag, T], tag_prefix: Tag = OBJECTIVE) -> List[T]:
+    """
+    Return a mapping from tags to values for all tags in ``mapping`` that start with ``tag_prefix``.
+
+    :param mapping: A mapping from tags to values.
+    :param tag_prefix: A tag prefix.
+    :return: A list of values from ``mapping`` for all tags in ``mapping`` that start with
+        ``tag_prefix``.
+    """
+    return [
+        value for tag, value in mapping.items() if LocalTag.from_tag(tag).global_tag == tag_prefix
+    ]
+
+
 class Timer:
     """
     Functionality for timing chunks of code. For example:

From 2de62719df5dbaa06f8f3ad24cf4f98b09cc2ee4 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 6 Oct 2023 17:26:40 +0100
Subject: [PATCH 06/33] Always include global dataset in mapping

---
 tests/unit/acquisition/test_rule.py   | 13 ++++++++-
 tests/unit/objectives/test_utils.py   | 12 +++++---
 tests/unit/test_bayesian_optimizer.py | 14 ++--------
 trieste/acquisition/rule.py           | 28 +++++++++++++------
 trieste/ask_tell_optimization.py      | 34 +++++------------------
 trieste/bayesian_optimizer.py         | 31 +++++----------------
 trieste/objectives/utils.py           |  5 ++--
 trieste/utils/misc.py                 | 40 +++++++++++----------------
 8 files changed, 76 insertions(+), 101 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index c03df12e31..a6322e5e2a 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1641,6 +1641,7 @@ def prepare_acquisition_function(
         ),
         (
             {
+                OBJECTIVE: mk_dataset([[-1.0]], [[-1.0]]),  # Should be ignored.
                 "OBJECTIVE__0": mk_dataset([[0.0]], [[1.0]]),
                 "OBJECTIVE__1": mk_dataset([[1.0]], [[1.0]]),
                 "OBJECTIVE__2": mk_dataset([[2.0]], [[1.0]]),
@@ -1658,7 +1659,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     # Non-overlapping regions.
     subspaces = [
         TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, i, init_eps=0.4)
-        for i in range(3)
+        for i in range(num_local_models)
     ]
     models = copy_to_local_models(QuadraticMeanAndRBFKernel(), num_local_models)
     base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
@@ -1671,6 +1672,8 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     new_data = observer(points)
     assert not isinstance(new_data, Dataset)
     datasets = rule.update_datasets(datasets, new_data)
+
+    # Check local datasets.
     for i, subspace in enumerate(subspaces):
         assert (
             datasets[f"OBJECTIVE__{i}"].query_points.shape[0]
@@ -1678,6 +1681,14 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
         )
         assert np.all(subspace.contains(datasets[f"OBJECTIVE__{i}"].query_points))
 
+    # Check global dataset.
+    assert datasets[OBJECTIVE].query_points.shape[0] == num_local_models * (
+        exp_num_init_points + num_query_points_per_region
+    )
+    # Each point should be in at least one region.
+    for point in datasets[OBJECTIVE].query_points:
+        assert any(subspace.contains(point) for subspace in subspaces)
+
 
 def test_multi_trust_region_box_state_deepcopy() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index 1c10301cac..8826a7166a 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -86,11 +86,15 @@ def test_mk_batch_observer(
     else:
         assert isinstance(ys, dict)
         if batch_size == 1:
-            assert ys.keys() == {key}
-            npt.assert_array_equal(ys[key].query_points, x_[:, 0])
-            npt.assert_array_equal(ys[key].observations, x_[:, 0])
+            exp_keys = {key}
         else:
-            assert ys.keys() == {f"{key}__{i}" for i in range(batch_size)}
+            exp_keys = {f"{key}__{i}" for i in range(batch_size)}
+            exp_keys.add(key)
+
+        assert ys.keys() == exp_keys
+        npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
+        npt.assert_array_equal(ys[key].observations, tf.reshape(x_, [-1, 1]))
+        if batch_size > 1:
             for i in range(batch_size):
                 npt.assert_array_equal(ys[f"{key}__{i}"].query_points, x_[:, i])
                 npt.assert_array_equal(ys[f"{key}__{i}"].observations, x_[:, i])
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index 90135d90f7..5a4e7250c4 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -252,6 +252,7 @@ def test_bayesian_optimizer_creates_correct_datasets_for_rank3_points(
             LocalTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
             for i in range(batch_size)
         }
+        init_data[OBJECTIVE] = mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])
 
     query_points = tf.reshape(
         tf.constant(range(batch_size * num_query_points_per_batch), tf.float64),
@@ -266,11 +267,7 @@ def __init__(self) -> None:
 
         def update(self, dataset: Dataset) -> None:
             if use_global_model:
-                if use_global_init_dataset:
-                    exp_init_qps = init_data[OBJECTIVE].query_points
-                else:
-                    exp_init_qps = tf.stack([data.query_points for data in init_data.values()], 1)
-                    exp_init_qps = tf.reshape(exp_init_qps, [-1, 1])
+                exp_init_qps = init_data[OBJECTIVE].query_points
             else:
                 if use_global_init_dataset:
                     exp_init_qps = init_data[OBJECTIVE].query_points
@@ -283,12 +280,7 @@ def update(self, dataset: Dataset) -> None:
             else:
                 # Subsequent model training.
                 if use_global_model:
-                    if use_global_init_dataset:
-                        _exp_init_qps = tf.tile(exp_init_qps[:, None], [1, batch_size, 1])
-                    else:
-                        _exp_init_qps = tf.reshape(exp_init_qps, (-1, batch_size, 1))
-                    exp_qps = tf.concat([_exp_init_qps, query_points], 0)
-                    exp_qps = tf.reshape(exp_qps, [-1, 1])
+                    exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
                 else:
                     index = LocalTag.from_tag(self._tag).local_index
                     exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index df6e504398..019ae94953 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -73,7 +73,7 @@
     batchify_vectorize,
 )
 from .sampler import ExactThompsonSampler, ThompsonSampler
-from .utils import get_local_dataset, get_unique_points_mask, select_nth_output
+from .utils import get_local_dataset, get_unique_points_mask, select_nth_output, stack_datasets
 
 ResultType = TypeVar("ResultType", covariant=True)
 """ Unbound covariant type variable. """
@@ -167,16 +167,12 @@ def update_datasets(
         # Otherwise keep the existing dataset from datasets.
         # TODO: this could mean that when we have a global model, the global dataset
         #   can contain multiple copies of the initial dataset.
-        # datasets = {
-        #    tag: get_value_for_tag(datasets, [tag, tag.split("__")[0]])[1] + new_datasets[tag]
-        #    for tag in new_datasets
-        # }
-        datasets = {}
+        updated_datasets = {}
         for tag in new_datasets:
             _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
             assert dataset is not None
-            datasets[tag] = dataset + new_datasets[tag]
-        return datasets
+            updated_datasets[tag] = dataset + new_datasets[tag]
+        return updated_datasets
 
 
 class EfficientGlobalOptimization(
@@ -1270,12 +1266,28 @@ def update_datasets(
             used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
         filtered_datasets = {}
+        global_tags = []  # Global datasets to re-generate.
         for tag, used_mask in used_masks.items():
             filtered_datasets[tag] = Dataset(
                 tf.boolean_mask(datasets[tag].query_points, used_mask),
                 tf.boolean_mask(datasets[tag].observations, used_mask),
             )
 
+            ltag = LocalTag.from_tag(tag)
+            if ltag.is_local and ltag.global_tag not in global_tags:
+                global_tags.append(ltag.global_tag)
+
+        # Include global datasets.
+        for gtag in global_tags:
+            # Create global dataset from local datasets. This is done by stacking the local
+            # datasets.
+            local_datasets = [
+                value
+                for tag, value in filtered_datasets.items()
+                if (ltag := LocalTag.from_tag(tag)).is_local and ltag.global_tag == gtag
+            ]
+            filtered_datasets[gtag] = stack_datasets(local_datasets)
+
         return filtered_datasets
 
 
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index c0f7e627a5..198618d5eb 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -32,7 +32,6 @@
 
 from . import logging
 from .acquisition.rule import TURBO, AcquisitionRule, EfficientGlobalOptimization
-from .acquisition.utils import stack_datasets
 from .bayesian_optimizer import (
     FrozenRecord,
     OptimizationResult,
@@ -44,12 +43,11 @@
 )
 from .data import Dataset
 from .models import TrainableProbabilisticModel
-from .objectives.utils import mk_batch_observer
 from .observer import OBJECTIVE
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Ok, Timer
-from .utils.misc import LocalTag, get_value_for_tag, get_values_for_tag_prefix
+from .utils.misc import LocalTag, get_value_for_tag
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -237,18 +235,9 @@ def __init__(
         if fit_model:
             with Timer() as initial_model_fitting_timer:
                 for tag, model in self._models.items():
-                    if LocalTag.from_tag(tag).is_local or tag in datasets:
-                        tags = [
-                            tag,
-                            LocalTag.from_tag(tag).global_tag,
-                        ]  # Prefer local dataset if available.
-                        _, dataset = get_value_for_tag(datasets, tags)
-                    else:
-                        # Global model. If global dataset does not exist, create
-                        # one by concatenating all the local datasets.
-                        dataset = stack_datasets(
-                            get_values_for_tag_prefix(datasets, tag)
-                        )
+                    # Prefer local dataset if available.
+                    tags = [tag, LocalTag.from_tag(tag).global_tag]
+                    _, dataset = get_value_for_tag(datasets, tags)
                     assert dataset is not None
                     model.update(dataset)
                     model.optimize_and_save_result(dataset)
@@ -457,18 +446,9 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
 
         with Timer() as model_fitting_timer:
             for tag, model in self._models.items():
-                if LocalTag.from_tag(tag).is_local or tag in self._datasets:
-                    tags = [
-                        tag,
-                        LocalTag.from_tag(tag).global_tag,
-                    ]  # Prefer local dataset if available.
-                    _, dataset = get_value_for_tag(self._datasets, tags)
-                else:
-                    # Global model. If global dataset does not exist, create
-                    # one by concatenating all the local datasets.
-                    dataset = stack_datasets(
-                        get_values_for_tag_prefix(self._datasets, tag)
-                    )
+                # Prefer local dataset if available.
+                tags = [tag, LocalTag.from_tag(tag).global_tag]
+                _, dataset = get_value_for_tag(self._datasets, tags)
                 assert dataset is not None
                 model.update(dataset)
                 model.optimize_and_save_result(dataset)
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 4d928edd00..212612f696 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -55,7 +55,6 @@
 
 from . import logging
 from .acquisition.rule import TURBO, AcquisitionRule, EfficientGlobalOptimization
-from .acquisition.utils import stack_datasets
 from .data import Dataset
 from .models import SupportsCovarianceWithTopFidelity, TrainableProbabilisticModel
 from .objectives.utils import mk_batch_observer
@@ -63,7 +62,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Err, Ok, Result, Timer
-from .utils.misc import LocalTag, get_value_for_tag, get_values_for_tag_prefix
+from .utils.misc import LocalTag, get_value_for_tag
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -738,16 +737,9 @@ def optimize(
                 if step == 1 and fit_model and fit_initial_model:
                     with Timer() as initial_model_fitting_timer:
                         for tag, model in models.items():
-                            if LocalTag.from_tag(tag).is_local or tag in datasets:
-                                tags = [
-                                    tag,
-                                    LocalTag.from_tag(tag).global_tag,
-                                ]  # Prefer local dataset if available.
-                                _, dataset = get_value_for_tag(datasets, tags)
-                            else:
-                                # Global model. If global dataset does not exist, create
-                                # one by concatenating all the local datasets.
-                                dataset = stack_datasets(get_values_for_tag_prefix(datasets, tag))
+                            # Prefer local dataset if available.
+                            tags = [tag, LocalTag.from_tag(tag).global_tag]
+                            _, dataset = get_value_for_tag(datasets, tags)
                             assert dataset is not None
                             model.update(dataset)
                             model.optimize_and_save_result(dataset)
@@ -786,18 +778,9 @@ def optimize(
                     with Timer() as model_fitting_timer:
                         if fit_model:
                             for tag, model in models.items():
-                                if LocalTag.from_tag(tag).is_local or tag in datasets:
-                                    tags = [
-                                        tag,
-                                        LocalTag.from_tag(tag).global_tag,
-                                    ]  # Prefer local dataset if available.
-                                    _, dataset = get_value_for_tag(datasets, tags)
-                                else:
-                                    # Global model. If global dataset does not exist, create
-                                    # one by concatenating all the local datasets.
-                                    dataset = stack_datasets(
-                                        get_values_for_tag_prefix(datasets, tag)
-                                    )
+                                # Prefer local dataset if available.
+                                tags = [tag, LocalTag.from_tag(tag).global_tag]
+                                _, dataset = get_value_for_tag(datasets, tags)
                                 assert dataset is not None
                                 model.update(dataset)
                                 model.optimize_and_save_result(dataset)
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index f177d34cd9..1d32fe61d9 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -106,12 +106,13 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
                 # If batch size is 1, just return the dataset as is, i.e. use the global dataset.
                 return {key: obs_or_dataset}
             else:
-                # Include per batch dataset.
+                # Include overall dataset and per batch dataset.
                 obs = obs_or_dataset.observations
                 qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
                 obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
                 datasets: Mapping[Tag, Dataset] = {
-                    **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)}
+                    key: obs_or_dataset,
+                    **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)},
                 }
                 return datasets
 
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index 7625b06c9d..88c1823513 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -21,7 +21,6 @@
     Any,
     Callable,
     Generic,
-    List,
     Mapping,
     NoReturn,
     Optional,
@@ -281,42 +280,35 @@ def tag(self) -> Tag:
         else:
             return self.global_tag
 
+    def __repr__(self) -> str:
+        """Return the local tag."""
+        return f"LocalTag({self.global_tag}, {self.local_index})"
+
     def __str__(self) -> str:
         """Return the local tag."""
         return str(self.tag)
 
     def __hash__(self) -> int:
-        """Return the hash of the local tag."""
+        """Return the hash of the overall tag."""
         return hash(self.tag)
 
     def __eq__(self, other: object) -> bool:
         """Return True if the local tag is equal to the other object."""
-        return isinstance(other, LocalTag) and self.tag == other.tag
+        return hash(self) == hash(other)
 
     @staticmethod
-    def from_tag(tag: Tag) -> LocalTag:
+    def from_tag(tag: Union[Tag, LocalTag]) -> LocalTag:
         """Return a LocalTag from a given tag."""
-        tag = str(tag)
-        if "__" in tag:
-            global_tag, _local_index = tag.split("__")
-            local_index = int(_local_index)
+        if isinstance(tag, LocalTag):
+            return tag
         else:
-            global_tag, local_index = tag, None
-        return LocalTag(global_tag, local_index)
-
-
-def get_values_for_tag_prefix(mapping: Mapping[Tag, T], tag_prefix: Tag = OBJECTIVE) -> List[T]:
-    """
-    Return a mapping from tags to values for all tags in ``mapping`` that start with ``tag_prefix``.
-
-    :param mapping: A mapping from tags to values.
-    :param tag_prefix: A tag prefix.
-    :return: A list of values from ``mapping`` for all tags in ``mapping`` that start with
-        ``tag_prefix``.
-    """
-    return [
-        value for tag, value in mapping.items() if LocalTag.from_tag(tag).global_tag == tag_prefix
-    ]
+            tag = str(tag)
+            if "__" in tag:
+                global_tag, _local_index = tag.split("__")
+                local_index = int(_local_index)
+            else:
+                global_tag, local_index = tag, None
+            return LocalTag(global_tag, local_index)
 
 
 class Timer:

From 6236c95e0c5cec43a6e4f4a4aefc60d4f1742552 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 6 Oct 2023 18:02:57 +0100
Subject: [PATCH 07/33] Add filter_mask method to trust region

---
 tests/unit/acquisition/test_rule.py | 31 ++++++++++++-----------------
 trieste/acquisition/rule.py         | 24 +++++++++++++++++++---
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index a6322e5e2a..b26480c448 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1193,22 +1193,8 @@ def test_trust_region_box_get_dataset_min() -> None:
     trb._lower = tf.constant([0.2, 0.2], dtype=tf.float64)
     trb._upper = tf.constant([0.7, 0.7], dtype=tf.float64)
     x_min, y_min = trb.get_dataset_min(dataset)
-    npt.assert_array_equal(x_min, tf.constant([0.3, 0.4], dtype=tf.float64))
-    npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
-
-
-# get_dataset_min returns first x value and inf y value when points in dataset are outside the
-# search space.
-def test_trust_region_box_get_dataset_min_outside_search_space() -> None:
-    search_space = Box([0.0, 0.0], [1.0, 1.0])
-    dataset = Dataset(
-        tf.constant([[1.2, 1.3], [-0.4, -0.5]], dtype=tf.float64),
-        tf.constant([[0.7], [0.9]], dtype=tf.float64),
-    )
-    trb = SingleObjectiveTrustRegionBox(search_space)
-    x_min, y_min = trb.get_dataset_min(dataset)
-    npt.assert_array_equal(x_min, tf.constant([1.2, 1.3], dtype=tf.float64))
-    npt.assert_array_equal(y_min, tf.constant([np.inf], dtype=tf.float64))
+    npt.assert_array_equal(x_min, tf.constant([0.1, 0.1], dtype=tf.float64))
+    npt.assert_array_equal(y_min, tf.constant([0.0], dtype=tf.float64))
 
 
 # Initialize sets the box to a random location, and sets the eps and y_min values.
@@ -1619,8 +1605,8 @@ def prepare_acquisition_function(
 
     npt.assert_array_equal(points.shape, [num_query_points_per_region, num_regions, 2])
 
-    # Each region should find the minimum of its local model, which will be the center of the
-    # region.
+    # Each region should find the minimum of its local model, which will be the center of
+    # the region.
     exp_points = tf.stack([base_shift + i for i in range(num_regions)])
     exp_points = tf.tile(exp_points[None, :, :], [num_query_points_per_region, 1, 1])
     npt.assert_allclose(points, exp_points)
@@ -1688,6 +1674,15 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     # Each point should be in at least one region.
     for point in datasets[OBJECTIVE].query_points:
         assert any(subspace.contains(point) for subspace in subspaces)
+    # Global dataset should be the concatenation of all local datasets.
+    exp_query_points = tf.reshape(
+        tf.stack(
+            [datasets[f"OBJECTIVE__{i}"].query_points for i in range(num_local_models)],
+            axis=1,
+        ),
+        [-1, 1],
+    )
+    npt.assert_array_almost_equal(datasets[OBJECTIVE].query_points, exp_query_points)
 
 
 def test_multi_trust_region_box_state_deepcopy() -> None:
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 019ae94953..57da8d603a 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -1024,6 +1024,18 @@ def select_dataset(
         # By default return the OBJECTIVE dataset.
         return get_value_for_tag(datasets, OBJECTIVE)
 
+    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
+        """
+        Return a boolean mask that can be used to filter out points from the given dataset that
+        do not belong to this region.
+
+        :param dataset: The dataset to filter.
+        :return: A boolean mask that can be used to filter the given dataset. A value of `True`
+            indicates that the corresponding point should be kept.
+        """
+        # By default return a mask that filters nothing.
+        return tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+
 
 UpdatableTrustRegionType = TypeVar("UpdatableTrustRegionType", bound=UpdatableTrustRegion)
 """ A type variable bound to :class:`UpdatableTrustRegion`. """
@@ -1260,9 +1272,7 @@ def update_datasets(
         for subspace in self._init_subspaces:
             tag, space_dataset = subspace.select_dataset(datasets)
             assert space_dataset is not None
-            in_region = subspace.contains(space_dataset.query_points)
-            # Assign slice of used_masks on axis 1 at index from values in
-            # in_region, i.e. ued_mask[index] = in_region
+            in_region = subspace.get_dataset_filter_mask(space_dataset)
             used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
         filtered_datasets = {}
@@ -1412,6 +1422,10 @@ def select_dataset(
             ]  # Prefer local dataset if available.
         return get_value_for_tag(datasets, tags)
 
+    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
+        # Only keep points that are in the box.
+        return self.contains(dataset.query_points)
+
     @check_shapes(
         "return[0]: [D]",
         "return[1]: []",
@@ -1553,6 +1567,10 @@ def initialize(
 
         super().initialize(models, datasets)
 
+    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
+        # Don't filter out any points from the dataset.
+        return tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+
 
 class TURBO(
     AcquisitionRule[

From e96ebe8e88f4815d4c7ba54632725034b568ee57 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Mon, 9 Oct 2023 18:14:22 +0100
Subject: [PATCH 08/33] Add more testing

---
 docs/notebooks/trust_region.pct.py            |  70 ++++++++---
 .../integration/test_bayesian_optimization.py |  60 +++++++--
 tests/unit/acquisition/test_rule.py           |  57 +++++----
 tests/unit/objectives/test_utils.py           |  14 +--
 trieste/acquisition/rule.py                   | 118 ++++++++++++------
 trieste/acquisition/utils.py                  |  21 +---
 trieste/ask_tell_optimization.py              |   7 +-
 trieste/bayesian_optimizer.py                 |   9 +-
 trieste/objectives/utils.py                   |  22 ++--
 9 files changed, 247 insertions(+), 131 deletions(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index 195168ddcd..589b535360 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -106,7 +106,9 @@ def build_model():
 from trieste.experimental.plotting import plot_bo_points, plot_function_2d
 
 
-def plot_final_result(_dataset: trieste.data.Dataset) -> None:
+def plot_final_result(
+    _dataset: trieste.data.Dataset, num_init_points=num_initial_data_points
+) -> None:
     arg_min_idx = tf.squeeze(tf.argmin(_dataset.observations, axis=0))
     query_points = _dataset.query_points.numpy()
     _, ax = plot_function_2d(
@@ -117,7 +119,7 @@ def plot_final_result(_dataset: trieste.data.Dataset) -> None:
         contour=True,
     )
 
-    plot_bo_points(query_points, ax[0, 0], num_initial_data_points, arg_min_idx)
+    plot_bo_points(query_points, ax[0, 0], num_init_points, arg_min_idx)
 
 
 plot_final_result(dataset)
@@ -144,7 +146,10 @@ def plot_final_result(_dataset: trieste.data.Dataset) -> None:
 )
 
 
-def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
+def plot_history(
+    result: trieste.bayesian_optimizer.OptimizationResult,
+    num_init_points=num_initial_data_points,
+) -> None:
     frames = []
     for step, hist in enumerate(
         result.history + [result.final_result.unwrap()]
@@ -154,7 +159,7 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
             search_space.lower,
             search_space.upper,
             hist,
-            num_init=num_initial_data_points,
+            num_init=num_init_points,
         )
 
         if fig is not None:
@@ -203,15 +208,15 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 # contribute `Q` query points to the overall batch.
 
 # %%
-num_query_points = 6
+num_query_points = 5
 
 init_subspaces = [
-    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space, i)
-    for i in range(num_query_points)
+    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space)
+    for _ in range(num_query_points)
 ]
 base_rule = trieste.acquisition.rule.EfficientGlobalOptimization(  # type: ignore[var-annotated]
     builder=trieste.acquisition.ParallelContinuousThompsonSampling(),
-    # num_query_points=num_query_points,
+    num_query_points=num_query_points,
 )
 batch_acq_rule = trieste.acquisition.rule.BatchTrustRegionBox(
     init_subspaces, base_rule
@@ -230,12 +235,7 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 
 num_steps = 5
 result = bo.optimize(
-    # num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
-    num_steps,
-    {trieste.observer.OBJECTIVE: initial_data},
-    trieste.acquisition.utils.copy_to_local_models(build_model(), 2),
-    batch_acq_rule,
-    track_state=True,
+    num_steps, initial_data, build_model(), batch_acq_rule, track_state=True
 )
 dataset = result.try_get_final_dataset()
 
@@ -245,10 +245,48 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 # We visualize the results as before.
 
 # %%
-plot_final_result(dataset)
+plot_final_result(dataset, num_init_points=0)
 
 # %%
-plot_history(result)
+plot_history(result, num_init_points=0)
+
+# %% [markdown]
+# ## TEST
+
+# %%
+num_query_points = 5
+
+init_subspaces = [
+    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space)
+    for _ in range(num_query_points)
+]
+base_rule = trieste.acquisition.rule.EfficientGlobalOptimization(
+    builder=trieste.acquisition.ParallelContinuousThompsonSampling(),
+    num_query_points=1,
+)
+batch_acq_rule = trieste.acquisition.rule.BatchTrustRegionBox(
+    init_subspaces, base_rule
+)
+
+bo = trieste.bayesian_optimizer.BayesianOptimizer(observer, search_space)
+
+num_steps = 5
+result = bo.optimize(
+    num_steps,
+    {trieste.observer.OBJECTIVE: initial_data},
+    trieste.acquisition.utils.copy_to_local_models(
+        build_model(), num_query_points
+    ),
+    batch_acq_rule,
+    track_state=True,
+)
+dataset = result.try_get_final_dataset()
+
+# %%
+plot_final_result(dataset, num_init_points=0)
+
+# %%
+plot_history(result, num_init_points=0)
 
 # %% [markdown]
 # ## Trust region `TurBO` acquisition rule
diff --git a/tests/integration/test_bayesian_optimization.py b/tests/integration/test_bayesian_optimization.py
index 591ef8b62e..c9b02591f9 100644
--- a/tests/integration/test_bayesian_optimization.py
+++ b/tests/integration/test_bayesian_optimization.py
@@ -58,6 +58,7 @@
     TREGOBox,
 )
 from trieste.acquisition.sampler import ThompsonSamplerFromTrajectory
+from trieste.acquisition.utils import copy_to_local_models
 from trieste.bayesian_optimizer import (
     BayesianOptimizer,
     FrozenRecord,
@@ -98,12 +99,13 @@
 # (regenerating is necessary as some of the acquisition rules are stateful).
 def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
     return (
-        "num_steps, acquisition_rule",
+        "num_steps, acquisition_rule, num_models",
         [
-            pytest.param(20, EfficientGlobalOptimization(), id="EfficientGlobalOptimization"),
+            pytest.param(20, EfficientGlobalOptimization(), 1, id="EfficientGlobalOptimization"),
             pytest.param(
                 30,
                 EfficientGlobalOptimization(AugmentedExpectedImprovement().using(OBJECTIVE)),
+                1,
                 id="AugmentedExpectedImprovement",
             ),
             pytest.param(
@@ -112,6 +114,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     MonteCarloExpectedImprovement(int(1e3)).using(OBJECTIVE),
                     generate_continuous_optimizer(100),
                 ),
+                1,
                 id="MonteCarloExpectedImprovement",
             ),
             pytest.param(
@@ -122,6 +125,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         min_value_sampler=ThompsonSamplerFromTrajectory(sample_min_value=True),
                     ).using(OBJECTIVE)
                 ),
+                1,
                 id="MinValueEntropySearch",
             ),
             pytest.param(
@@ -130,6 +134,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     BatchExpectedImprovement(sample_size=100).using(OBJECTIVE),
                     num_query_points=3,
                 ),
+                1,
                 id="BatchExpectedImprovement",
             ),
             pytest.param(
@@ -138,10 +143,11 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     BatchMonteCarloExpectedImprovement(sample_size=500).using(OBJECTIVE),
                     num_query_points=3,
                 ),
+                1,
                 id="BatchMonteCarloExpectedImprovement",
             ),
             pytest.param(
-                12, AsynchronousOptimization(num_query_points=3), id="AsynchronousOptimization"
+                12, AsynchronousOptimization(num_query_points=3), 1, id="AsynchronousOptimization"
             ),
             pytest.param(
                 15,
@@ -151,6 +157,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
+                1,
                 id="LocalPenalization",
             ),
             pytest.param(
@@ -161,6 +168,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
+                1,
                 id="LocalPenalization/AsynchronousGreedy",
             ),
             pytest.param(
@@ -171,6 +179,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=2,
                 ),
+                1,
                 id="GIBBON",
             ),
             pytest.param(
@@ -181,11 +190,13 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
+                1,
                 id="MultipleOptimismNegativeLowerConfidenceBound",
             ),
             pytest.param(
                 20,
                 BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
+                1,
                 id="TREGO",
             ),
             pytest.param(
@@ -198,6 +209,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         ).using(OBJECTIVE)
                     ),
                 ),
+                1,
                 id="TREGO/MinValueEntropySearch",
             ),
             pytest.param(
@@ -209,11 +221,13 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         num_query_points=3,
                     ),
                 ),
+                1,
                 id="TREGO/ParallelContinuousThompsonSampling",
             ),
             pytest.param(
                 10,
                 TURBO(ScaledBranin.search_space, rule=DiscreteThompsonSampling(500, 3)),
+                1,
                 id="Turbo",
             ),
             pytest.param(
@@ -225,15 +239,29 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         num_query_points=3,
                     ),
                 ),
+                1,
                 id="BatchTrustRegionBox",
             ),
-            pytest.param(15, DiscreteThompsonSampling(500, 5), id="DiscreteThompsonSampling"),
+            pytest.param(
+                10,
+                BatchTrustRegionBox(
+                    [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
+                    EfficientGlobalOptimization(
+                        ParallelContinuousThompsonSampling(),
+                        num_query_points=2,
+                    ),
+                ),
+                3,
+                id="BatchTrustRegionBox/LocalModels",
+            ),
+            pytest.param(15, DiscreteThompsonSampling(500, 5), 1, id="DiscreteThompsonSampling"),
             pytest.param(
                 15,
                 EfficientGlobalOptimization(
                     Fantasizer(),
                     num_query_points=3,
                 ),
+                1,
                 id="Fantasizer",
             ),
             pytest.param(
@@ -242,6 +270,7 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     GreedyContinuousThompsonSampling(),
                     num_query_points=5,
                 ),
+                1,
                 id="GreedyContinuousThompsonSampling",
             ),
             pytest.param(
@@ -250,11 +279,13 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ParallelContinuousThompsonSampling(),
                     num_query_points=5,
                 ),
+                1,
                 id="ParallelContinuousThompsonSampling",
             ),
             pytest.param(
                 15,
                 BatchHypervolumeSharpeRatioIndicator() if pymoo else None,
+                1,
                 id="BatchHypevolumeSharpeRatioIndicator",
                 marks=pytest.mark.qhsri,
             ),
@@ -273,9 +304,14 @@ def test_bayesian_optimizer_with_gpr_finds_minima_of_scaled_branin(
         Box,
         GaussianProcessRegression,
     ],
+    num_models: int,
 ) -> None:
     _test_optimizer_finds_minimum(
-        GaussianProcessRegression, num_steps, acquisition_rule, optimize_branin=True
+        GaussianProcessRegression,
+        num_steps,
+        acquisition_rule,
+        optimize_branin=True,
+        num_models=num_models,
     )
 
 
@@ -289,10 +325,13 @@ def test_bayesian_optimizer_with_gpr_finds_minima_of_simple_quadratic(
         Box,
         GaussianProcessRegression,
     ],
+    num_models: int,
 ) -> None:
     # for speed reasons we sometimes test with a simple quadratic defined on the same search space
     # branin; currently assume that every rule should be able to solve this in 6 steps
-    _test_optimizer_finds_minimum(GaussianProcessRegression, min(num_steps, 6), acquisition_rule)
+    _test_optimizer_finds_minimum(
+        GaussianProcessRegression, min(num_steps, 6), acquisition_rule, num_models=num_models
+    )
 
 
 @random_seed
@@ -565,6 +604,7 @@ def _test_optimizer_finds_minimum(
     optimize_branin: bool = False,
     model_args: Optional[Mapping[str, Any]] = None,
     check_regret: bool = False,
+    num_models: int = 1,
 ) -> None:
     model_args = model_args or {}
 
@@ -647,13 +687,19 @@ def _test_optimizer_finds_minimum(
     else:
         raise ValueError(f"Unsupported model_type '{model_type}'")
 
+    models = cast(TrainableProbabilisticModelType, model)
+
+    if num_models > 1:
+        initial_data = {OBJECTIVE: initial_data}
+        models = copy_to_local_models(models, num_models)
+
     with tempfile.TemporaryDirectory() as tmpdirname:
         summary_writer = tf.summary.create_file_writer(tmpdirname)
         with tensorboard_writer(summary_writer):
             result = BayesianOptimizer(observer, search_space).optimize(
                 num_steps or 2,
                 initial_data,
-                cast(TrainableProbabilisticModelType, model),
+                models,
                 acquisition_rule,
                 track_state=True,
                 track_path=Path(tmpdirname) / "history",
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index b26480c448..ebe16855c6 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -554,7 +554,7 @@ def test_trego_raises_for_missing_datasets_key(
 ) -> None:
     search_space = Box([-1], [1])
     rule = BatchTrustRegionBox(TREGOBox(search_space))  # type: ignore[var-annotated]
-    with pytest.raises(ValueError, match="tag 'OBJECTIVE' not found"):
+    with pytest.raises(ValueError, match="none of the tags '.LocalTag.OBJECTIVE, 0., "):
         rule.acquire(search_space, models, datasets=datasets)(None)
 
 
@@ -594,7 +594,7 @@ def test_trego_for_default_state(
     assert state is not None
     subspace = state.acquisition_space.get_subspace("0")
     assert isinstance(subspace, TREGOBox)
-    npt.assert_array_almost_equal(query_point, expected_query_point, 5)
+    npt.assert_array_almost_equal(query_point, [expected_query_point], 5)
     npt.assert_array_almost_equal(subspace.lower, lower_bound)
     npt.assert_array_almost_equal(subspace.upper, upper_bound)
     npt.assert_array_almost_equal(subspace._y_min, [0.012])
@@ -655,7 +655,7 @@ def test_trego_successful_global_to_global_trust_region_unchanged(
     assert isinstance(current_subspace, TREGOBox)
     npt.assert_array_almost_equal(current_subspace._eps, eps)
     assert current_subspace._is_global
-    npt.assert_array_almost_equal(query_point, expected_query_point, 5)
+    npt.assert_array_almost_equal(query_point, [expected_query_point], 5)
     npt.assert_array_almost_equal(current_subspace.lower, lower_bound)
     npt.assert_array_almost_equal(current_subspace.upper, upper_bound)
 
@@ -697,7 +697,7 @@ def test_trego_for_unsuccessful_global_to_local_trust_region_unchanged(
     assert not current_subspace._is_global
     npt.assert_array_less(lower_bound, current_subspace.lower)
     npt.assert_array_less(current_subspace.upper, upper_bound)
-    assert query_point[0] in current_state.acquisition_space
+    assert query_point[0][0] in current_state.acquisition_space
 
 
 @pytest.mark.parametrize(
@@ -778,6 +778,27 @@ def test_trego_for_unsuccessful_local_to_global_trust_region_reduced(
     npt.assert_array_almost_equal(current_subspace.upper, upper_bound)
 
 
+def test_trego_always_uses_global_dataset() -> None:
+    search_space = Box([0.0, 0.0], [1.0, 1.0])
+    dataset = Dataset(
+        tf.constant([[0.1, 0.2], [-0.1, -0.2], [1.1, 2.3]]), tf.constant([[0.4], [0.5], [0.6]])
+    )
+    tr = BatchTrustRegionBox(TREGOBox(search_space))  # type: ignore[var-annotated]
+    new_data = Dataset(
+        tf.constant([[0.5, -0.2], [0.7, 0.2], [1.1, 0.3], [0.5, 0.5]]),
+        tf.constant([[0.7], [0.8], [0.9], [1.0]]),
+    )
+    updated_datasets = tr.update_datasets({"OBJECTIVE__0": dataset}, {"OBJECTIVE__0": new_data})
+
+    # Both the local and global datasets should match.
+    assert updated_datasets.keys() == {"OBJECTIVE", "OBJECTIVE__0"}
+    # Updated dataset should contain all the points, including ones outside the search space.
+    exp_dataset = dataset + new_data
+    for key in updated_datasets.keys():
+        npt.assert_array_equal(exp_dataset.query_points, updated_datasets[key].query_points)
+        npt.assert_array_equal(exp_dataset.observations, updated_datasets[key].observations)
+
+
 def test_trego_state_deepcopy() -> None:
     dataset = Dataset(tf.constant([[0.1, 0.2], [-0.1, -0.2]]), tf.constant([[0.4], [0.5]]))
     search_space = Box(tf.constant([1.2]), tf.constant([3.4]))
@@ -1193,8 +1214,8 @@ def test_trust_region_box_get_dataset_min() -> None:
     trb._lower = tf.constant([0.2, 0.2], dtype=tf.float64)
     trb._upper = tf.constant([0.7, 0.7], dtype=tf.float64)
     x_min, y_min = trb.get_dataset_min(dataset)
-    npt.assert_array_equal(x_min, tf.constant([0.1, 0.1], dtype=tf.float64))
-    npt.assert_array_equal(y_min, tf.constant([0.0], dtype=tf.float64))
+    npt.assert_array_equal(x_min, tf.constant([0.3, 0.4], dtype=tf.float64))
+    npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
 
 
 # Initialize sets the box to a random location, and sets the eps and y_min values.
@@ -1382,7 +1403,7 @@ def test_multi_trust_region_box_acquire_no_state() -> None:
     assert isinstance(state.acquisition_space, TaggedMultiSearchSpace)
     assert len(state.acquisition_space.subspace_tags) == 2
 
-    for index, (tag, point) in enumerate(zip(state.acquisition_space.subspace_tags, points)):
+    for index, (tag, point) in enumerate(zip(state.acquisition_space.subspace_tags, points[0])):
         subspace = state.acquisition_space.get_subspace(tag)
         assert subspace == subspaces[index]
         assert isinstance(subspace, SingleObjectiveTrustRegionBox)
@@ -1435,13 +1456,12 @@ def __init__(
         self,
         fixed_location: TensorType,
         global_search_space: SearchSpace,
-        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
         init_eps: float = 0.07,
     ):
-        super().__init__(global_search_space, index, beta, kappa, min_eps)
+        super().__init__(global_search_space, beta, kappa, min_eps)
         self._location = fixed_location
         self._init_eps_val = init_eps
 
@@ -1494,13 +1514,13 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
     next_state, points = state_func(state)
 
     assert next_state is not None
-    assert len(points) == 3
+    assert points.shape == [1, 3, 2]
     # The regions correspond to first, third and first points in the dataset.
     # First two regions should be updated.
     # The third region should be initialized and not updated, as it is too close to the first
     # subspace.
     for point, subspace, exp_obs, exp_eps in zip(
-        points,
+        points[0],
         subspaces,
         [dataset.observations[0], dataset.observations[2], dataset.observations[0]],
         [0.1, 0.1, 0.07],  # First two regions updated, third region initialized.
@@ -1516,7 +1536,7 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
 @pytest.mark.parametrize("use_global_dataset", [True, False])
 @pytest.mark.parametrize("num_regions", [2, 4])
 @pytest.mark.parametrize("num_query_points_per_region", [1, 2])
-def test_batch_trust_region_box_with_multiple_models_and_regions(
+def test_multi_trust_region_box_with_multiple_models_and_regions(
     use_global_model: bool,
     use_global_dataset: bool,
     num_regions: int,
@@ -1526,8 +1546,7 @@ def test_batch_trust_region_box_with_multiple_models_and_regions(
     base_shift = tf.constant([2.0, 2.0], dtype=tf.float64)  # Common base shift for all regions.
     eps = 0.9
     subspaces = [
-        TestTrustRegionBox(base_shift + i, search_space, i, init_eps=eps)
-        for i in range(num_regions)
+        TestTrustRegionBox(base_shift + i, search_space, init_eps=eps) for i in range(num_regions)
     ]
 
     # Define the models and acquisition functions for each region
@@ -1644,7 +1663,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     search_space = Box([0.0], [3.0])
     # Non-overlapping regions.
     subspaces = [
-        TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, i, init_eps=0.4)
+        TestTrustRegionBox(tf.constant([i], dtype=tf.float64), search_space, init_eps=0.4)
         for i in range(num_local_models)
     ]
     models = copy_to_local_models(QuadraticMeanAndRBFKernel(), num_local_models)
@@ -1675,12 +1694,8 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     for point in datasets[OBJECTIVE].query_points:
         assert any(subspace.contains(point) for subspace in subspaces)
     # Global dataset should be the concatenation of all local datasets.
-    exp_query_points = tf.reshape(
-        tf.stack(
-            [datasets[f"OBJECTIVE__{i}"].query_points for i in range(num_local_models)],
-            axis=1,
-        ),
-        [-1, 1],
+    exp_query_points = tf.concat(
+        [datasets[f"OBJECTIVE__{i}"].query_points for i in range(num_local_models)], axis=0
     )
     npt.assert_array_almost_equal(datasets[OBJECTIVE].query_points, exp_query_points)
 
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index 8826a7166a..d4eb7ea67f 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -85,16 +85,12 @@ def test_mk_batch_observer(
         npt.assert_array_equal(ys.observations, tf.reshape(x_, [-1, 1]))
     else:
         assert isinstance(ys, dict)
-        if batch_size == 1:
-            exp_keys = {key}
-        else:
-            exp_keys = {f"{key}__{i}" for i in range(batch_size)}
-            exp_keys.add(key)
+        exp_keys = {f"{key}__{i}" for i in range(batch_size)}
+        exp_keys.add(str(key))
 
         assert ys.keys() == exp_keys
         npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
         npt.assert_array_equal(ys[key].observations, tf.reshape(x_, [-1, 1]))
-        if batch_size > 1:
-            for i in range(batch_size):
-                npt.assert_array_equal(ys[f"{key}__{i}"].query_points, x_[:, i])
-                npt.assert_array_equal(ys[f"{key}__{i}"].observations, x_[:, i])
+        for i in range(batch_size):
+            npt.assert_array_equal(ys[f"{key}__{i}"].query_points, x_[:, i])
+            npt.assert_array_equal(ys[f"{key}__{i}"].observations, x_[:, i])
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 57da8d603a..8c66ceacb0 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -73,7 +73,7 @@
     batchify_vectorize,
 )
 from .sampler import ExactThompsonSampler, ThompsonSampler
-from .utils import get_local_dataset, get_unique_points_mask, select_nth_output, stack_datasets
+from .utils import get_local_dataset, get_unique_points_mask, select_nth_output
 
 ResultType = TypeVar("ResultType", covariant=True)
 """ Unbound covariant type variable. """
@@ -970,6 +970,9 @@ def acquire(
 class UpdatableTrustRegion(SearchSpace):
     """A search space that can be updated."""
 
+    def __init__(self) -> None:
+        self.index: Optional[int] = None
+
     @abstractmethod
     def initialize(
         self,
@@ -1019,22 +1022,30 @@ def select_dataset(
         only required if the region is used with single model acquisition functions.
 
         :param datasets: The dataset for each tag.
-        :return: The dataset belonging to this region.
+        :return: The tag and associated dataset belonging to this region.
         """
         # By default return the OBJECTIVE dataset.
         return get_value_for_tag(datasets, OBJECTIVE)
 
-    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
+    def get_dataset_filter_mask(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
         """
-        Return a boolean mask that can be used to filter out points from the given dataset that
-        do not belong to this region.
+        Return a boolean mask that can be used to filter out points from the dataset that
+        belong to this region.
 
-        :param dataset: The dataset to filter.
-        :return: A boolean mask that can be used to filter the given dataset. A value of `True`
-            indicates that the corresponding point should be kept.
+        :param datasets: The dataset for each tag.
+        :return: The tag for the selected dataset and a boolean mask that can be used to filter
+            that dataset. A value of `True` indicates that the corresponding point should be kept.
         """
-        # By default return a mask that filters nothing.
-        return tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+        # Always select the region dataset for filtering. Don't directly filter the global dataset.
+        assert self.index is not None, "the index should be set for filtering local datasets"
+        tag, dataset = get_value_for_tag(datasets, LocalTag(OBJECTIVE, self.index))
+        if dataset is None:
+            return None, None
+        else:
+            # By default return a mask that filters nothing.
+            return tag, tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
 
 
 UpdatableTrustRegionType = TypeVar("UpdatableTrustRegionType", bound=UpdatableTrustRegion)
@@ -1088,6 +1099,8 @@ def __init__(
             if not isinstance(init_subspaces, Sequence):
                 init_subspaces = [init_subspaces]
             self._init_subspaces = tuple(init_subspaces)
+            for index, subspace in enumerate(self._init_subspaces):
+                subspace.index = index
             self._tags = tuple([str(index) for index in range(len(init_subspaces))])
 
         self._rule = rule
@@ -1266,13 +1279,15 @@ def update_datasets(
         used_masks = {
             tag: tf.zeros(dataset.query_points.shape[:-1], dtype=tf.bool)
             for tag, dataset in datasets.items()
+            if LocalTag.from_tag(tag).is_local
         }
         # TODO: using init_subspaces here is a bit of a hack, but it works for now.
         assert self._init_subspaces is not None
         for subspace in self._init_subspaces:
-            tag, space_dataset = subspace.select_dataset(datasets)
-            assert space_dataset is not None
-            in_region = subspace.get_dataset_filter_mask(space_dataset)
+            tag, in_region = subspace.get_dataset_filter_mask(datasets)
+            assert tag is not None
+            ltag = LocalTag.from_tag(tag)
+            assert ltag.is_local, f"can only filter local tags, got {tag}"
             used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
         filtered_datasets = {}
@@ -1284,19 +1299,23 @@ def update_datasets(
             )
 
             ltag = LocalTag.from_tag(tag)
-            if ltag.is_local and ltag.global_tag not in global_tags:
+            if ltag.global_tag not in global_tags:
                 global_tags.append(ltag.global_tag)
 
         # Include global datasets.
         for gtag in global_tags:
-            # Create global dataset from local datasets. This is done by stacking the local
+            # Create global dataset from local datasets. This is done by concatenating the local
             # datasets.
             local_datasets = [
                 value
                 for tag, value in filtered_datasets.items()
-                if (ltag := LocalTag.from_tag(tag)).is_local and ltag.global_tag == gtag
+                if LocalTag.from_tag(tag).global_tag == gtag
             ]
-            filtered_datasets[gtag] = stack_datasets(local_datasets)
+            # Note there is no ordering assumption for the local datasets. They are simply
+            # concatenated and information about which local dataset they came from is lost.
+            qps = tf.concat([dataset.query_points for dataset in local_datasets], axis=0)
+            obs = tf.concat([dataset.observations for dataset in local_datasets], axis=0)
+            filtered_datasets[gtag] = Dataset(qps, obs)
 
         return filtered_datasets
 
@@ -1307,7 +1326,6 @@ class SingleObjectiveTrustRegionBox(Box, UpdatableTrustRegion):
     def __init__(
         self,
         global_search_space: SearchSpace,
-        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
@@ -1324,12 +1342,12 @@ def __init__(
         """
 
         self._global_search_space = global_search_space
-        self._index = index
         self._beta = beta
         self._kappa = kappa
         self._min_eps = min_eps
 
         super().__init__(global_search_space.lower, global_search_space.upper)
+        super(Box, self).__init__()
 
     @property
     def global_search_space(self) -> SearchSpace:
@@ -1402,10 +1420,10 @@ def select_model(
     ) -> Tuple[Optional[Tag], Optional[ProbabilisticModelType]]:
         # Select the model belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and models.
-        if self._index is None:
+        if self.index is None:
             tags = [OBJECTIVE]  # If no index, then pick the global model.
         else:
-            tags = [LocalTag(OBJECTIVE, self._index), OBJECTIVE]  # Prefer local model if available.
+            tags = [LocalTag(OBJECTIVE, self.index), OBJECTIVE]  # Prefer local model if available.
         return get_value_for_tag(models, tags)
 
     def select_dataset(
@@ -1413,18 +1431,26 @@ def select_dataset(
     ) -> Tuple[Optional[Tag], Optional[Dataset]]:
         # Select the dataset belonging to this box. Note there isn't necessarily a one-to-one
         # mapping between regions and datasets.
-        if self._index is None:
+        if self.index is None:
             tags = [OBJECTIVE]  # If no index, then pick the global dataset.
         else:
             tags = [
-                LocalTag(OBJECTIVE, self._index),
+                LocalTag(OBJECTIVE, self.index),
                 OBJECTIVE,
             ]  # Prefer local dataset if available.
         return get_value_for_tag(datasets, tags)
 
-    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
-        # Only keep points that are in the box.
-        return self.contains(dataset.query_points)
+    def get_dataset_filter_mask(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
+        # Always select the region dataset for filtering. Don't directly filter the global dataset.
+        assert self.index is not None, "the index should be set for filtering local datasets"
+        tag, dataset = get_value_for_tag(datasets, LocalTag(OBJECTIVE, self.index))
+        if dataset is None:
+            return None, None
+        else:
+            # Only keep points that are in the box.
+            return tag, self.contains(dataset.query_points)
 
     @check_shapes(
         "return[0]: [D]",
@@ -1435,11 +1461,15 @@ def get_dataset_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, Tenso
         if dataset is None:
             raise ValueError("""dataset must be provided""")
 
-        # Note the behaviour here depends on the dataset passed in, which could be the global
-        # dataset or the local dataset.
-        ix = tf.argmin(dataset.observations)
+        in_tr = self.contains(dataset.query_points)
+        in_tr_obs = tf.where(
+            tf.expand_dims(in_tr, axis=-1),
+            dataset.observations,
+            tf.constant(np.inf, dtype=dataset.observations.dtype),
+        )
+        ix = tf.argmin(in_tr_obs)
         x_min = tf.gather(dataset.query_points, ix)
-        y_min = tf.gather(dataset.observations, ix)
+        y_min = tf.gather(in_tr_obs, ix)
 
         return tf.squeeze(x_min, axis=0), tf.squeeze(y_min)
 
@@ -1468,8 +1498,10 @@ def acquire(
                 num_query_points = 1
 
             self._init_subspaces = tuple(
-                [SingleObjectiveTrustRegionBox(search_space, i) for i in range(num_query_points)]
+                [SingleObjectiveTrustRegionBox(search_space) for _ in range(num_query_points)]
             )
+            for index, subspace in enumerate(self._init_subspaces):
+                subspace.index = index
             self._tags = tuple([str(index) for index in range(len(self._init_subspaces))])
 
         # Ensure passed in global search space is always the same as the search space passed to
@@ -1520,12 +1552,11 @@ class TREGOBox(SingleObjectiveTrustRegionBox):
     def __init__(
         self,
         global_search_space: SearchSpace,
-        index: Optional[int] = None,
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
     ):
-        super().__init__(global_search_space, index, beta, kappa, min_eps)
+        super().__init__(global_search_space, beta, kappa, min_eps)
         self._is_global = False
         self._initialized = False
 
@@ -1567,9 +1598,24 @@ def initialize(
 
         super().initialize(models, datasets)
 
-    def get_dataset_filter_mask(self, dataset: Dataset) -> tf.Tensor:
-        # Don't filter out any points from the dataset.
-        return tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+    def get_dataset_filter_mask(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
+        # Don't filter out any points from the dataset by bypassing the
+        # SingleObjectiveTrustRegionBox method.
+        return super(SingleObjectiveTrustRegionBox, self).get_dataset_filter_mask(datasets)
+
+    @inherit_check_shapes
+    def get_dataset_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
+        if dataset is None:
+            raise ValueError("""dataset must be provided""")
+
+        # Always return the global minimum.
+        ix = tf.argmin(dataset.observations)
+        x_min = tf.gather(dataset.query_points, ix)
+        y_min = tf.gather(dataset.observations, ix)
+
+        return tf.squeeze(x_min, axis=0), tf.squeeze(y_min)
 
 
 class TURBO(
diff --git a/trieste/acquisition/utils.py b/trieste/acquisition/utils.py
index fbe1ee7af4..602afebc77 100644
--- a/trieste/acquisition/utils.py
+++ b/trieste/acquisition/utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import copy
 import functools
-from typing import Mapping, Sequence, Tuple, Union
+from typing import Mapping, Tuple, Union
 
 import tensorflow as tf
 from check_shapes import check_shapes
@@ -157,25 +157,6 @@ def copy_to_local_models(
     return {f"{key}__{i}": copy.deepcopy(global_model) for i in range(num_local_models)}
 
 
-def stack_datasets(datasets: Sequence[Dataset]) -> Dataset:
-    """
-    Stack a sequence of datasets along a new second batch axis.
-
-    :param datasets: A sequence of datasets.
-    :return: A dataset whose query points and observations are the stack of the query points
-        and observations in ``datasets`` along the second axis.
-    :raise ValueError: If ``datasets`` is empty.
-    :raise InvalidArgumentError: If the shapes of the query points in ``datasets`` differ in any
-        but the first dimension. The same applies for observations.
-    """
-    if not datasets:
-        raise ValueError("datasets must be non-empty")
-
-    qps = tf.stack([dataset.query_points for dataset in datasets], axis=1)
-    obs = tf.stack([dataset.observations for dataset in datasets], axis=1)
-    return Dataset(tf.reshape(qps, [-1, qps.shape[-1]]), tf.reshape(obs, [-1, obs.shape[-1]]))
-
-
 @check_shapes(
     "points: [n_points, ...]",
     "return: [n_points]",
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index 198618d5eb..c69870063e 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -446,10 +446,9 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
 
         with Timer() as model_fitting_timer:
             for tag, model in self._models.items():
-                # Prefer local dataset if available.
-                tags = [tag, LocalTag.from_tag(tag).global_tag]
-                _, dataset = get_value_for_tag(self._datasets, tags)
-                assert dataset is not None
+                # Always use the matching dataset to the model. If the model is
+                # local, then the dataset should be too by this stage.
+                dataset = self._datasets[tag]
                 model.update(dataset)
                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 212612f696..c744a66b3a 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -778,10 +778,9 @@ def optimize(
                     with Timer() as model_fitting_timer:
                         if fit_model:
                             for tag, model in models.items():
-                                # Prefer local dataset if available.
-                                tags = [tag, LocalTag.from_tag(tag).global_tag]
-                                _, dataset = get_value_for_tag(datasets, tags)
-                                assert dataset is not None
+                                # Always use the matching dataset to the model. If the model is
+                                # local, then the dataset should be too by this stage.
+                                dataset = datasets[tag]
                                 model.update(dataset)
                                 model.optimize_and_save_result(dataset)
 
@@ -959,7 +958,7 @@ def write_summary_observations(
     observation_plot_dfs: MutableMapping[Tag, pd.DataFrame],
 ) -> None:
     """Write TensorBoard summary for the current step observations."""
-    for tag in datasets:
+    for tag in models:
         with tf.name_scope(f"{tag}.model"):
             models[tag].log(datasets[tag])
 
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 1d32fe61d9..46ba822353 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -102,18 +102,14 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
             # Always use rank 2 shape as models (e.g. GPR) expect this, so return as is.
             return obs_or_dataset
         else:
-            if batch_size == 1:
-                # If batch size is 1, just return the dataset as is, i.e. use the global dataset.
-                return {key: obs_or_dataset}
-            else:
-                # Include overall dataset and per batch dataset.
-                obs = obs_or_dataset.observations
-                qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
-                obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
-                datasets: Mapping[Tag, Dataset] = {
-                    key: obs_or_dataset,
-                    **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)},
-                }
-                return datasets
+            # Include overall dataset and per batch dataset.
+            obs = obs_or_dataset.observations
+            qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
+            obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
+            datasets: Mapping[Tag, Dataset] = {
+                key: obs_or_dataset,
+                **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)},
+            }
+            return datasets
 
     return _observer

From 9a7dd2391aeaca4db8473e3654e8b5991ea5dc49 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 10 Oct 2023 15:14:05 +0100
Subject: [PATCH 09/33] Fix mypy model type issues

---
 tests/integration/test_bayesian_optimization.py | 13 ++++++-------
 tests/unit/acquisition/test_rule.py             |  6 +++---
 trieste/acquisition/utils.py                    |  9 +++++----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/integration/test_bayesian_optimization.py b/tests/integration/test_bayesian_optimization.py
index c9b02591f9..25b544d774 100644
--- a/tests/integration/test_bayesian_optimization.py
+++ b/tests/integration/test_bayesian_optimization.py
@@ -26,6 +26,7 @@
 from _pytest.mark import ParameterSet
 
 from tests.util.misc import random_seed
+from trieste.data import Dataset
 from trieste.acquisition import (
     GIBBON,
     AcquisitionFunctionClass,
@@ -86,7 +87,7 @@
 from trieste.objectives.utils import mk_observer
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace
-from trieste.types import State, TensorType
+from trieste.types import State, Tag, TensorType
 
 try:
     import pymoo
@@ -687,18 +688,16 @@ def _test_optimizer_finds_minimum(
     else:
         raise ValueError(f"Unsupported model_type '{model_type}'")
 
-    models = cast(TrainableProbabilisticModelType, model)
-
-    if num_models > 1:
-        initial_data = {OBJECTIVE: initial_data}
-        models = copy_to_local_models(models, num_models)
+    model = cast(TrainableProbabilisticModelType, model)
+    models = copy_to_local_models(model, num_models) if num_models > 1 else {OBJECTIVE: model}
+    dataset: Mapping[Tag, Dataset] = {OBJECTIVE: initial_data}
 
     with tempfile.TemporaryDirectory() as tmpdirname:
         summary_writer = tf.summary.create_file_writer(tmpdirname)
         with tensorboard_writer(summary_writer):
             result = BayesianOptimizer(observer, search_space).optimize(
                 num_steps or 2,
-                initial_data,
+                dataset,
                 models,
                 acquisition_rule,
                 track_state=True,
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index ebe16855c6..e49d3ca4b0 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1681,10 +1681,10 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     # Check local datasets.
     for i, subspace in enumerate(subspaces):
         assert (
-            datasets[f"OBJECTIVE__{i}"].query_points.shape[0]
+            datasets[LocalTag(OBJECTIVE, i)].query_points.shape[0]
             == exp_num_init_points + num_query_points_per_region
         )
-        assert np.all(subspace.contains(datasets[f"OBJECTIVE__{i}"].query_points))
+        assert np.all(subspace.contains(datasets[LocalTag(OBJECTIVE, i)].query_points))
 
     # Check global dataset.
     assert datasets[OBJECTIVE].query_points.shape[0] == num_local_models * (
@@ -1695,7 +1695,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
         assert any(subspace.contains(point) for subspace in subspaces)
     # Global dataset should be the concatenation of all local datasets.
     exp_query_points = tf.concat(
-        [datasets[f"OBJECTIVE__{i}"].query_points for i in range(num_local_models)], axis=0
+        [datasets[LocalTag(OBJECTIVE, i)].query_points for i in range(num_local_models)], axis=0
     )
     npt.assert_array_almost_equal(datasets[OBJECTIVE].query_points, exp_query_points)
 
diff --git a/trieste/acquisition/utils.py b/trieste/acquisition/utils.py
index 602afebc77..dd28a1fa1c 100644
--- a/trieste/acquisition/utils.py
+++ b/trieste/acquisition/utils.py
@@ -19,10 +19,11 @@
 from check_shapes import check_shapes
 
 from ..data import Dataset
-from ..models.interfaces import ProbabilisticModel
+from ..models import ProbabilisticModelType
 from ..observer import OBJECTIVE
 from ..space import SearchSpaceType
 from ..types import Tag, TensorType
+from ..utils.misc import LocalTag
 from .interface import AcquisitionFunction
 from .optimizer import AcquisitionOptimizer
 
@@ -142,10 +143,10 @@ def get_local_dataset(local_space: SearchSpaceType, dataset: Dataset) -> Dataset
 
 
 def copy_to_local_models(
-    global_model: ProbabilisticModel,
+    global_model: ProbabilisticModelType,
     num_local_models: int,
     key: Tag = OBJECTIVE,
-) -> Mapping[Tag, ProbabilisticModel]:
+) -> Mapping[Tag, ProbabilisticModelType]:
     """
     Helper method to copy a global model to local models.
 
@@ -154,7 +155,7 @@ def copy_to_local_models(
     :param key: The tag prefix for the local models.
     :return: A mapping of the local models.
     """
-    return {f"{key}__{i}": copy.deepcopy(global_model) for i in range(num_local_models)}
+    return {LocalTag(key, i).tag: copy.deepcopy(global_model) for i in range(num_local_models)}
 
 
 @check_shapes(

From 4622a98785acd79bd640f2e7e9a50e7ed79f57ff Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 10 Oct 2023 18:48:56 +0100
Subject: [PATCH 10/33] Add ask_tell testing

---
 .../integration/test_ask_tell_optimization.py | 68 +++++++++++++---
 .../integration/test_bayesian_optimization.py |  5 +-
 tests/unit/acquisition/test_rule.py           | 14 ++++
 tests/unit/objectives/test_utils.py           |  9 ++-
 tests/unit/test_ask_tell_optimization.py      | 80 ++++++++++++++++++-
 tests/unit/test_bayesian_optimizer.py         |  2 +-
 tests/unit/utils/test_misc.py                 | 39 ++++++++-
 trieste/ask_tell_optimization.py              |  6 +-
 trieste/objectives/utils.py                   |  1 -
 9 files changed, 201 insertions(+), 23 deletions(-)

diff --git a/tests/integration/test_ask_tell_optimization.py b/tests/integration/test_ask_tell_optimization.py
index e471bd695d..185dcb2097 100644
--- a/tests/integration/test_ask_tell_optimization.py
+++ b/tests/integration/test_ask_tell_optimization.py
@@ -23,22 +23,24 @@
 import tensorflow as tf
 
 from tests.util.misc import random_seed
-from trieste.acquisition import LocalPenalization
+from trieste.acquisition import LocalPenalization, ParallelContinuousThompsonSampling
 from trieste.acquisition.rule import (
     AcquisitionRule,
     AsynchronousGreedy,
     AsynchronousRuleState,
     BatchTrustRegionBox,
     EfficientGlobalOptimization,
+    SingleObjectiveTrustRegionBox,
     TREGOBox,
 )
+from trieste.acquisition.utils import copy_to_local_models
 from trieste.ask_tell_optimization import AskTellOptimizer
 from trieste.bayesian_optimizer import OptimizationResult, Record
 from trieste.logging import set_step_number, tensorboard_writer
 from trieste.models import TrainableProbabilisticModel
 from trieste.models.gpflow import GaussianProcessRegression, build_gpr
 from trieste.objectives import ScaledBranin, SimpleQuadratic
-from trieste.objectives.utils import mk_observer
+from trieste.objectives.utils import mk_batch_observer, mk_observer
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace
 from trieste.types import State, TensorType
@@ -47,26 +49,58 @@
 # We use a copy of these for a quicker test against a simple quadratic function
 # (copying is necessary as some of the acquisition rules are stateful).
 OPTIMIZER_PARAMS = (
-    "num_steps, reload_state, acquisition_rule_fn",
+    "num_steps, reload_state, acquisition_rule_fn, num_models",
     [
         pytest.param(
-            20, False, lambda: EfficientGlobalOptimization(), id="EfficientGlobalOptimization"
+            20, False, lambda: EfficientGlobalOptimization(), 1, id="EfficientGlobalOptimization"
         ),
         pytest.param(
             20,
             True,
             lambda: EfficientGlobalOptimization(),
+            1,
             id="EfficientGlobalOptimization/reload_state",
         ),
         pytest.param(
-            15, False, lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)), id="TREGO"
+            15,
+            False,
+            lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
+            1,
+            id="TREGO",
         ),
         pytest.param(
             16,
             True,
             lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
+            1,
             id="TREGO/reload_state",
         ),
+        pytest.param(
+            10,
+            False,
+            lambda: BatchTrustRegionBox(
+                [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
+                EfficientGlobalOptimization(
+                    ParallelContinuousThompsonSampling(),
+                    num_query_points=3,
+                ),
+            ),
+            1,
+            id="BatchTrustRegionBox",
+        ),
+        pytest.param(
+            10,
+            False,
+            lambda: BatchTrustRegionBox(
+                [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
+                EfficientGlobalOptimization(
+                    ParallelContinuousThompsonSampling(),
+                    num_query_points=2,
+                ),
+            ),
+            3,
+            id="BatchTrustRegionBox/LocalModels",
+        ),
         pytest.param(
             10,
             False,
@@ -76,6 +110,7 @@
                 ).using(OBJECTIVE),
                 num_query_points=3,
             ),
+            1,
             id="LocalPenalization",
         ),
         pytest.param(
@@ -86,6 +121,7 @@
                     ScaledBranin.search_space,
                 ).using(OBJECTIVE),
             ),
+            1,
             id="LocalPenalization/AsynchronousGreedy",
         ),
     ],
@@ -109,8 +145,11 @@ def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
             TrainableProbabilisticModel,
         ],
     ],
+    num_models: int,
 ) -> None:
-    _test_ask_tell_optimization_finds_minima(True, num_steps, reload_state, acquisition_rule_fn)
+    _test_ask_tell_optimization_finds_minima(
+        True, num_steps, reload_state, acquisition_rule_fn, num_models
+    )
 
 
 @random_seed
@@ -129,11 +168,12 @@ def test_ask_tell_optimizer_finds_minima_of_simple_quadratic(
             TrainableProbabilisticModel,
         ],
     ],
+    num_models: int,
 ) -> None:
     # for speed reasons we sometimes test with a simple quadratic defined on the same search space
     # branin; currently assume that every rule should be able to solve this in 5 steps
     _test_ask_tell_optimization_finds_minima(
-        False, min(num_steps, 5), reload_state, acquisition_rule_fn
+        False, min(num_steps, 5), reload_state, acquisition_rule_fn, num_models
     )
 
 
@@ -152,6 +192,7 @@ def _test_ask_tell_optimization_finds_minima(
             TrainableProbabilisticModel,
         ],
     ],
+    num_models: int,
 ) -> None:
     # For the case when optimization state is saved and reload on each iteration
     # we need to use new acquisition function object to imitate real life usage
@@ -160,17 +201,22 @@ def _test_ask_tell_optimization_finds_minima(
     search_space = ScaledBranin.search_space
     initial_query_points = search_space.sample(5)
     observer = mk_observer(ScaledBranin.objective if optimize_branin else SimpleQuadratic.objective)
+    batch_observer = mk_batch_observer(observer, OBJECTIVE)
     initial_data = observer(initial_query_points)
 
     model = GaussianProcessRegression(
         build_gpr(initial_data, search_space, likelihood_variance=1e-7)
     )
+    models = copy_to_local_models(model, num_models) if num_models > 1 else {OBJECTIVE: model}
+    initial_dataset = {OBJECTIVE: initial_data}
 
     with tempfile.TemporaryDirectory() as tmpdirname:
         summary_writer = tf.summary.create_file_writer(tmpdirname)
         with tensorboard_writer(summary_writer):
             set_step_number(0)
-            ask_tell = AskTellOptimizer(search_space, initial_data, model, acquisition_rule_fn())
+            ask_tell = AskTellOptimizer(
+                search_space, initial_dataset, models, acquisition_rule_fn()
+            )
 
             for i in range(1, num_steps + 1):
                 # two scenarios are tested here, depending on `reload_state` parameter
@@ -185,7 +231,11 @@ def _test_ask_tell_optimization_finds_minima(
                     ] = ask_tell.to_record()
                     written_state = pickle.dumps(state)
 
-                new_data_point = observer(new_point)
+                # If query points are rank 3, then use a batched observer.
+                if tf.rank(new_point) == 3:
+                    new_data_point = batch_observer(new_point)
+                else:
+                    new_data_point = observer(new_point)
 
                 if reload_state:
                     state = pickle.loads(written_state)
diff --git a/tests/integration/test_bayesian_optimization.py b/tests/integration/test_bayesian_optimization.py
index 25b544d774..645c1049ec 100644
--- a/tests/integration/test_bayesian_optimization.py
+++ b/tests/integration/test_bayesian_optimization.py
@@ -26,7 +26,6 @@
 from _pytest.mark import ParameterSet
 
 from tests.util.misc import random_seed
-from trieste.data import Dataset
 from trieste.acquisition import (
     GIBBON,
     AcquisitionFunctionClass,
@@ -87,7 +86,7 @@
 from trieste.objectives.utils import mk_observer
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace
-from trieste.types import State, Tag, TensorType
+from trieste.types import State, TensorType
 
 try:
     import pymoo
@@ -690,7 +689,7 @@ def _test_optimizer_finds_minimum(
 
     model = cast(TrainableProbabilisticModelType, model)
     models = copy_to_local_models(model, num_models) if num_models > 1 else {OBJECTIVE: model}
-    dataset: Mapping[Tag, Dataset] = {OBJECTIVE: initial_data}
+    dataset = {OBJECTIVE: initial_data}
 
     with tempfile.TemporaryDirectory() as tmpdirname:
         summary_writer = tf.summary.create_file_writer(tmpdirname)
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index e49d3ca4b0..d75b30ac11 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1218,6 +1218,20 @@ def test_trust_region_box_get_dataset_min() -> None:
     npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
 
 
+# get_dataset_min returns first x value and inf y value when points in dataset are outside the
+# search space.
+def test_trust_region_box_get_dataset_min_outside_search_space() -> None:
+    search_space = Box([0.0, 0.0], [1.0, 1.0])
+    dataset = Dataset(
+        tf.constant([[1.2, 1.3], [-0.4, -0.5]], dtype=tf.float64),
+        tf.constant([[0.7], [0.9]], dtype=tf.float64),
+    )
+    trb = SingleObjectiveTrustRegionBox(search_space)
+    x_min, y_min = trb.get_dataset_min(dataset)
+    npt.assert_array_equal(x_min, tf.constant([1.2, 1.3], dtype=tf.float64))
+    npt.assert_array_equal(y_min, tf.constant([np.inf], dtype=tf.float64))
+
+
 # Initialize sets the box to a random location, and sets the eps and y_min values.
 def test_trust_region_box_initialize() -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index d4eb7ea67f..98e6538ae6 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -21,6 +21,7 @@
 from trieste.objectives.utils import mk_batch_observer, mk_multi_observer, mk_observer
 from trieste.observer import SingleObserver
 from trieste.types import Tag, TensorType
+from trieste.utils.misc import LocalTag
 
 
 def test_mk_observer() -> None:
@@ -85,12 +86,12 @@ def test_mk_batch_observer(
         npt.assert_array_equal(ys.observations, tf.reshape(x_, [-1, 1]))
     else:
         assert isinstance(ys, dict)
-        exp_keys = {f"{key}__{i}" for i in range(batch_size)}
-        exp_keys.add(str(key))
+        exp_keys = {LocalTag(key, i).tag for i in range(batch_size)}
+        exp_keys.add(key)
 
         assert ys.keys() == exp_keys
         npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
         npt.assert_array_equal(ys[key].observations, tf.reshape(x_, [-1, 1]))
         for i in range(batch_size):
-            npt.assert_array_equal(ys[f"{key}__{i}"].query_points, x_[:, i])
-            npt.assert_array_equal(ys[f"{key}__{i}"].observations, x_[:, i])
+            npt.assert_array_equal(ys[LocalTag(key, i)].query_points, x_[:, i])
+            npt.assert_array_equal(ys[LocalTag(key, i)].observations, x_[:, i])
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 7c638f2baf..67c26efaed 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -15,19 +15,28 @@
 
 from typing import Mapping, Optional
 
+import numpy.testing as npt
 import pytest
 import tensorflow as tf
 
 from tests.util.misc import FixedAcquisitionRule, assert_datasets_allclose, mk_dataset
-from tests.util.models.gpflow.models import GaussianProcess, PseudoTrainableProbModel, rbf
+from tests.util.models.gpflow.models import (
+    GaussianProcess,
+    PseudoTrainableProbModel,
+    QuadraticMeanAndRBFKernel,
+    rbf,
+)
 from trieste.acquisition.rule import AcquisitionRule
+from trieste.acquisition.utils import copy_to_local_models
 from trieste.ask_tell_optimization import AskTellOptimizer
 from trieste.bayesian_optimizer import OptimizationResult, Record
 from trieste.data import Dataset
 from trieste.models.interfaces import ProbabilisticModel, TrainableProbabilisticModel
+from trieste.objectives.utils import mk_batch_observer
 from trieste.observer import OBJECTIVE
 from trieste.space import Box
 from trieste.types import State, Tag, TensorType
+from trieste.utils.misc import LocalTag
 
 # tags
 TAG1: Tag = "1"
@@ -427,3 +436,72 @@ def __deepcopy__(self, memo: dict[int, object]) -> _UncopyableModel:
     with pytest.raises(NotImplementedError):
         ask_tell.to_result()
     assert ask_tell.to_result(copy=False).final_result.is_ok
+
+
+@pytest.mark.parametrize("use_global_model", [True, False])
+@pytest.mark.parametrize("use_global_init_dataset", [True, False])
+@pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
+def test_ask_tell_optimizer_creates_correct_datasets_for_rank3_points(
+    use_global_model: bool, use_global_init_dataset: bool, num_query_points_per_batch: int
+) -> None:
+    batch_size = 4
+    if use_global_init_dataset:
+        init_data = {OBJECTIVE: mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])}
+    else:
+        init_data = {
+            LocalTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
+            for i in range(batch_size)
+        }
+        init_data[OBJECTIVE] = mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])
+
+    query_points = tf.reshape(
+        tf.constant(range(batch_size * num_query_points_per_batch), tf.float64),
+        (num_query_points_per_batch, batch_size, 1),
+    )
+
+    class DatasetChecker(QuadraticMeanAndRBFKernel, PseudoTrainableProbModel):
+        def __init__(self) -> None:
+            super().__init__()
+            self.update_count = 0
+            self._tag = OBJECTIVE
+
+        def update(self, dataset: Dataset) -> None:
+            if use_global_model:
+                exp_init_qps = init_data[OBJECTIVE].query_points
+            else:
+                if use_global_init_dataset:
+                    exp_init_qps = init_data[OBJECTIVE].query_points
+                else:
+                    exp_init_qps = init_data[self._tag].query_points
+
+            if self.update_count == 0:
+                # Initial model training.
+                exp_qps = exp_init_qps
+            else:
+                # Subsequent model training.
+                if use_global_model:
+                    exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
+                else:
+                    index = LocalTag.from_tag(self._tag).local_index
+                    exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
+
+            npt.assert_array_equal(exp_qps, dataset.query_points)
+            self.update_count += 1
+
+    search_space = Box([-1], [1])
+
+    model = DatasetChecker()
+    if use_global_model:
+        models = {OBJECTIVE: model}
+    else:
+        models = copy_to_local_models(model, batch_size)  # type: ignore[assignment]
+    for tag, model in models.items():
+        model._tag = tag
+
+    observer = mk_batch_observer(lambda x: Dataset(x, x), OBJECTIVE)
+    rule = FixedAcquisitionRule(query_points)
+    ask_tell = AskTellOptimizer(search_space, init_data, models, rule)
+
+    points = ask_tell.ask()
+    new_data = observer(points)
+    ask_tell.tell(new_data)
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index 5a4e7250c4..d41b58c1fe 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -259,7 +259,7 @@ def test_bayesian_optimizer_creates_correct_datasets_for_rank3_points(
         (num_query_points_per_batch, batch_size, 1),
     )
 
-    class DatasetChecker(_PseudoTrainableQuadratic):
+    class DatasetChecker(QuadraticMeanAndRBFKernel, PseudoTrainableProbModel):
         def __init__(self) -> None:
             super().__init__()
             self.update_count = 0
diff --git a/tests/unit/utils/test_misc.py b/tests/unit/utils/test_misc.py
index aa7d581dd6..1e0eaf1a75 100644
--- a/tests/unit/utils/test_misc.py
+++ b/tests/unit/utils/test_misc.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 from time import sleep
-from typing import Any
+from typing import Any, Optional, Union
 
 import numpy as np
 import numpy.testing as npt
@@ -23,9 +23,10 @@
 
 from tests.util.misc import TF_DEBUGGING_ERROR_TYPES, ShapeLike, various_shapes
 from trieste.observer import OBJECTIVE
-from trieste.types import TensorType
+from trieste.types import Tag, TensorType
 from trieste.utils.misc import (
     Err,
+    LocalTag,
     Ok,
     Timer,
     flatten_leading_dims,
@@ -101,7 +102,7 @@ def test_get_value_for_tag_returns_none_if_mapping_is_none() -> None:
 
 
 def test_get_value_for_tag_raises_if_tag_not_in_mapping() -> None:
-    with pytest.raises(ValueError, match="none of the tags '\['baz'\]' found in mapping"):
+    with pytest.raises(ValueError, match="none of the tags '.'baz'.' found in mapping"):
         get_value_for_tag({"foo": "bar"}, "baz")
 
 
@@ -119,6 +120,38 @@ def test_get_value_for_tag_returns_first_matching_tag() -> None:
     ) == ("qux", "quux")
 
 
+@pytest.mark.parametrize("tag_name", ["test_tag_1", "test_tag_2"])
+@pytest.mark.parametrize("tag_index", [0, 2, None])
+def test_local_tag_creation(tag_name: str, tag_index: Optional[int]) -> None:
+    tag = LocalTag(tag_name, tag_index)
+    is_local = True if tag_index is not None else False
+    exp_tag = f"{tag_name}__{tag_index}" if is_local else tag_name
+
+    assert tag.is_local == is_local
+    assert tag.global_tag == tag_name
+    assert tag.local_index == tag_index
+    assert tag == exp_tag
+    assert tag.tag == exp_tag
+    assert str(tag) == exp_tag
+    assert repr(tag) == f"LocalTag({tag_name}, {tag_index})"
+    assert hash(tag) == hash(exp_tag)
+
+
+@pytest.mark.parametrize(
+    "tag, exp_tag",
+    [
+        ("test_tag_1", LocalTag("test_tag_1", None)),
+        ("test_tag__2", LocalTag("test_tag", 2)),
+        (LocalTag("test_tag_1", 3), LocalTag("test_tag_1", 3)),
+        (LocalTag("test_tag", None), LocalTag("test_tag", None)),
+    ],
+)
+def test_local_tag_from_tag(tag: Union[Tag, LocalTag], exp_tag: LocalTag) -> None:
+    ltag = LocalTag.from_tag(tag)
+    assert ltag.global_tag == exp_tag.global_tag
+    assert ltag.local_index == exp_tag.local_index
+
+
 def test_Timer() -> None:
     sleep_time = 0.1
     with Timer() as timer:
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index c69870063e..c0ba3487ce 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -436,7 +436,11 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
         if isinstance(new_data, Dataset):
             new_data = {OBJECTIVE: new_data}
 
-        if self._datasets.keys() != new_data.keys():
+        # The datasets must have the same keys as the existing datasets. Only exception is if
+        # the existing datasets are all global, in which case the dataset will be appropriately
+        # updated below for the next iteration.
+        datasets_indices = {LocalTag.from_tag(tag).local_index for tag in self._datasets.keys()}
+        if self._datasets.keys() != new_data.keys() and datasets_indices != {None}:
             raise ValueError(
                 f"new_data keys {new_data.keys()} doesn't "
                 f"match dataset keys {self._datasets.keys()}"
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 46ba822353..0984139381 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -77,7 +77,6 @@ def mk_batch_observer(
     :return: A multi-observer across the batch dimension of query points, returning the data from
         ``objective``. If ``key`` is provided, the observer will be a mapping. Otherwise, it will
         return a single dataset.
-    :raise ValueError (or tf.errors.InvalidArgumentError): If the query points are not rank 3.
     :raise ValueError (or tf.errors.InvalidArgumentError): If ``objective_or_observer`` is a
         multi-observer.
     """

From af44e488c039b58ba9e4989fd0a5a0882185a4ff Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 10 Oct 2023 19:38:22 +0100
Subject: [PATCH 11/33] Fix summary init when only global dataset

---
 trieste/bayesian_optimizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index c744a66b3a..82a9a317c2 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -911,7 +911,11 @@ def write_summary_initial_model_fit(
     """Write TensorBoard summary for the model fitting to the initial data."""
     for tag, model in models.items():
         with tf.name_scope(f"{tag}.model"):
-            model.log(datasets[tag])
+            # Prefer local dataset if available.
+            tags = [tag, LocalTag.from_tag(tag).global_tag]
+            _, dataset = get_value_for_tag(datasets, tags)
+            assert dataset is not None
+            model.log(dataset)
     logging.scalar(
         "wallclock/model_fitting",
         model_fitting_timer.time,

From ef9e455730e050937229635ae3eb6be40bd90a07 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 10:14:34 +0100
Subject: [PATCH 12/33] Remove walrus operator

---
 trieste/acquisition/rule.py | 27 ++++++++++++---------------
 trieste/utils/misc.py       |  8 +++++---
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 8c66ceacb0..3218fe50bd 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -1140,28 +1140,25 @@ def acquire(
         assert self._init_subspaces is not None
 
         num_subspaces = len(self._tags)
-        self.num_local_models = len(
-            [
-                tag
-                for tag in models
-                if (ltag := LocalTag.from_tag(tag)).is_local and ltag.global_tag == OBJECTIVE
-            ]
-        )
-        self.num_local_models = max(self.num_local_models, 1)
-        assert num_subspaces % self.num_local_models == 0, (
-            f"The number of subspaces {num_subspaces} should be a multiple of the number of "
-            f"local objective models {self.num_local_models}"
+        num_local_models = 0
+        for tag in models:
+            ltag = LocalTag.from_tag(tag)
+            if ltag.is_local and ltag.global_tag == OBJECTIVE:
+                num_local_models += 1
+        assert num_local_models in [0, num_subspaces], (
+            f"When using local models, the number of subspaces {num_subspaces} should be equal to "
+            f"the number of local objective models {num_local_models}"
         )
 
-        # If the base rule is a single model acquisition rule, but we have (multiple) local
+        # If the base rule is a single model acquisition rule, but we have local
         # models, run the (deepcopied) base rule sequentially for each subspace.
         # Otherwise, run the base rule as is, once with all models and datasets.
-        # Note: this should only trigger on the first call to `acquire`, as after that `self._rule`
-        # will be a list of rules.
+        # Note: this should only trigger on the first call to `acquire`, as after that we will
+        # have a list of rules in `self._rules`.
         if (
             isinstance(self._rule, EfficientGlobalOptimization)
             and hasattr(self._rule._builder, "single_builder")
-            and (self.num_local_models > 1 or OBJECTIVE not in models)
+            and (num_local_models > 0 or OBJECTIVE not in models)
         ):
             self._rules = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
 
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index 88c1823513..f988a70da5 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -250,10 +250,12 @@ def get_value_for_tag(
 
     if mapping is None:
         return None, None
-    elif matched_tags := sorted(set(tags) & set(mapping.keys()), key=tags.index):
-        return matched_tags[0], mapping[matched_tags[0]]
     else:
-        raise ValueError(f"none of the tags '{tags}' found in mapping")
+        matched_tags = sorted(set(tags) & set(mapping.keys()), key=tags.index)
+        if matched_tags:
+            return matched_tags[0], mapping[matched_tags[0]]
+        else:
+            raise ValueError(f"none of the tags '{tags}' found in mapping")
 
 
 @dataclass(frozen=True)

From 59e0cab75e7849227e980121313675673ec2e542 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 10:14:54 +0100
Subject: [PATCH 13/33] Update test, ask_tell data not changed in-place

---
 tests/unit/test_ask_tell_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 67c26efaed..3b61090356 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -180,7 +180,7 @@ def test_ask_tell_optimizer_copies_state(
     ask_tell.tell(new_data)
     state_end: Record[None] = ask_tell.to_record(copy=copy)
 
-    assert_datasets_allclose(state_start.dataset, init_dataset if copy else init_dataset + new_data)
+    assert_datasets_allclose(state_start.dataset, init_dataset)
     assert_datasets_allclose(state_end.dataset, init_dataset + new_data)
     assert state_start.model is not model if copy else state_start.model is model
 

From e4a5342ada17775f31b905688aa5eb42f837a45b Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 13:52:33 +0100
Subject: [PATCH 14/33] Add some test comments

---
 tests/unit/acquisition/test_rule.py      | 18 +++++++++++++++---
 tests/unit/test_ask_tell_optimization.py |  2 ++
 tests/unit/test_bayesian_optimizer.py    |  2 ++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index d75b30ac11..c3e064fc52 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1545,7 +1545,18 @@ def test_multi_trust_region_box_acquire_with_state() -> None:
         npt.assert_allclose(subspace.eps, exp_eps)
 
 
-# Define a test case with multiple local models and multiple regions
+# Test case with multiple local models and multiple regions for batch trust regions.
+# It checks that the correct model is passed to each region, and that the correct dataset is
+# passed to each instance of the base rule (note: the base rule is deep-copied for each region).
+# This is done by mapping each region to a model. For each region the model has a local quadratic
+# shape with the minimum at the center of the region. The overal model is creating by creating
+# a product of all regions using that model. The end expected result is that each region should find
+# its center after optimization. If the wrong model is being used by a region, then instead it would
+# find one of its boundaries.
+# Note that the implementation of this test is more general than strictly required. It can support
+# fewer models than regions (as long as the number of regions is a multiple of the number of
+# models). However, currently trieste only supports either a global model or a one to one mapping
+# between models and regions.
 @pytest.mark.parametrize("use_global_model", [True, False])
 @pytest.mark.parametrize("use_global_dataset", [True, False])
 @pytest.mark.parametrize("num_regions", [2, 4])
@@ -1591,6 +1602,7 @@ def test_multi_trust_region_box_with_multiple_models_and_regions(
 
         kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(kernel_variance)
 
+        # Overall mean function is a product of local mean functions.
         def mean_function(x: TensorType, i: int = i) -> TensorType:
             return tf.reduce_prod(
                 tf.stack(
@@ -1602,7 +1614,6 @@ def mean_function(x: TensorType, i: int = i) -> TensorType:
                 axis=0,
             )
 
-        # mean_function = lambda x, i=i: quadratic(x - tf.cast(base_shift+i, dtype=x.dtype))
         models[tag] = GaussianProcess([mean_function], [kernel], noise_variance)
         models[tag]._exp_dataset = (  # type: ignore[attr-defined]
             global_dataset if use_global_dataset else init_datasets[tag]
@@ -1628,7 +1639,6 @@ def prepare_acquisition_function(
             return super().prepare_acquisition_function(model, dataset)
 
     base_rule = EfficientGlobalOptimization(  # type: ignore[var-annotated]
-        # builder=ParallelContinuousThompsonSampling(), num_query_points=num_query_points
         builder=TestMultipleOptimismNegativeLowerConfidenceBound(search_space),
         num_query_points=num_query_points,
     )
@@ -1645,6 +1655,8 @@ def prepare_acquisition_function(
     npt.assert_allclose(points, exp_points)
 
 
+# This test ensures that the datasets for each region are updated correctly. The datasets should
+# contain filtered data, i.e. only points in the respective regions.
 @pytest.mark.parametrize(
     "datasets, exp_num_init_points",
     [
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 3b61090356..0f2c8e9f2b 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -438,6 +438,8 @@ def __deepcopy__(self, memo: dict[int, object]) -> _UncopyableModel:
     assert ask_tell.to_result(copy=False).final_result.is_ok
 
 
+# Check that the correct dataset is routed to the model.
+# Note: this test is almost identical to the one in test_bayesian_optimizer.py.
 @pytest.mark.parametrize("use_global_model", [True, False])
 @pytest.mark.parametrize("use_global_init_dataset", [True, False])
 @pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index d41b58c1fe..f50f479589 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -238,6 +238,8 @@ def __call__(self, x: tf.Tensor) -> Dataset:
     assert observer.call_count == steps
 
 
+# Check that the correct dataset is routed to the model.
+# Note: this test is almost identical to the one in test_ask_tell_optimization.py.
 @pytest.mark.parametrize("use_global_model", [True, False])
 @pytest.mark.parametrize("use_global_init_dataset", [True, False])
 @pytest.mark.parametrize("num_query_points_per_batch", [1, 2])

From 69a8590c01e21817d1412ddaa4f6b0e6f1d0a05e Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 15:21:07 +0100
Subject: [PATCH 15/33] Add some rule comments

---
 trieste/acquisition/rule.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 3218fe50bd..af16c4bbc0 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -165,8 +165,6 @@ def update_datasets(
         # If a tag from tagged_output does not exist in datasets, then add it to
         # datasets by copying the dataset from datasets with the same tag-prefix.
         # Otherwise keep the existing dataset from datasets.
-        # TODO: this could mean that when we have a global model, the global dataset
-        #   can contain multiple copies of the initial dataset.
         updated_datasets = {}
         for tag in new_datasets:
             _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
@@ -1273,12 +1271,16 @@ def update_datasets(
     ) -> Mapping[Tag, Dataset]:
         datasets = super().update_datasets(datasets, new_datasets)
 
+        # Filter out points that are not in any of the subspaces. This is done by creating a mask
+        # for each local dataset that is True for points that are in any subspace.
         used_masks = {
             tag: tf.zeros(dataset.query_points.shape[:-1], dtype=tf.bool)
             for tag, dataset in datasets.items()
             if LocalTag.from_tag(tag).is_local
         }
-        # TODO: using init_subspaces here is a bit of a hack, but it works for now.
+        # Using init_subspaces here relies on the users not creating new subspaces after
+        # initialization. This is a reasonable assumption for now, however a better solution would
+        # be to remove this assumption.
         assert self._init_subspaces is not None
         for subspace in self._init_subspaces:
             tag, in_region = subspace.get_dataset_filter_mask(datasets)

From 40df5855bc38ed51386b319d8fffbee9760869f6 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 17:43:23 +0100
Subject: [PATCH 16/33] Allow input-multi-observers for batch observer

---
 .../integration/test_ask_tell_optimization.py |  2 +-
 tests/unit/acquisition/test_rule.py           |  2 +-
 tests/unit/objectives/test_utils.py           | 57 +++++++++++--------
 tests/unit/test_ask_tell_optimization.py      |  2 +-
 trieste/acquisition/rule.py                   | 18 ++++--
 trieste/bayesian_optimizer.py                 |  2 +-
 trieste/objectives/utils.py                   | 42 +++++++-------
 7 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/tests/integration/test_ask_tell_optimization.py b/tests/integration/test_ask_tell_optimization.py
index 185dcb2097..c1460612ef 100644
--- a/tests/integration/test_ask_tell_optimization.py
+++ b/tests/integration/test_ask_tell_optimization.py
@@ -201,7 +201,7 @@ def _test_ask_tell_optimization_finds_minima(
     search_space = ScaledBranin.search_space
     initial_query_points = search_space.sample(5)
     observer = mk_observer(ScaledBranin.objective if optimize_branin else SimpleQuadratic.objective)
-    batch_observer = mk_batch_observer(observer, OBJECTIVE)
+    batch_observer = mk_batch_observer(observer)
     initial_data = observer(initial_query_points)
 
     model = GaussianProcessRegression(
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index c3e064fc52..730fa3c096 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1699,7 +1699,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     )
     rule = BatchTrustRegionBox(subspaces, base_rule)
     _, points = rule.acquire(search_space, models, datasets)(None)
-    observer = mk_batch_observer(quadratic, OBJECTIVE)
+    observer = mk_batch_observer(quadratic)
     new_data = observer(points)
     assert not isinstance(new_data, Dataset)
     datasets = rule.update_datasets(datasets, new_data)
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index 98e6538ae6..df25f1b034 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Union
+from typing import Callable, Sequence, Union
 
 import numpy.testing as npt
 import pytest
@@ -19,7 +19,7 @@
 
 from trieste.data import Dataset
 from trieste.objectives.utils import mk_batch_observer, mk_multi_observer, mk_observer
-from trieste.observer import SingleObserver
+from trieste.observer import Observer
 from trieste.types import Tag, TensorType
 from trieste.utils.misc import LocalTag
 
@@ -58,40 +58,49 @@ def test_mk_multi_observer() -> None:
     npt.assert_array_equal(ys["bar"].observations, x_ - 1)
 
 
-def test_mk_batch_observer_raises_on_multi_observer() -> None:
-    observer = mk_batch_observer(mk_multi_observer(foo=lambda x: x + 1, bar=lambda x: x - 1))
-    with pytest.raises(ValueError, match="mk_batch_observer does not support multi-observers"):
-        observer(tf.constant([[[3.0]]]))
-
-
-@pytest.mark.parametrize("input_objective", [lambda x: x, lambda x: Dataset(x, x)])
+@pytest.mark.parametrize(
+    "input_objective, keys",
+    [
+        (lambda x: x, ["baz"]),
+        (lambda x: Dataset(x, x), ["baz"]),
+        (mk_multi_observer(foo=lambda x: x + 1, bar=lambda x: x - 1), ["foo", "bar"]),
+    ],
+)
 @pytest.mark.parametrize("batch_size", [1, 2, 3])
 @pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
-@pytest.mark.parametrize("key", [None, "bar"])
 def test_mk_batch_observer(
-    input_objective: Union[Callable[[TensorType], TensorType], SingleObserver],
+    input_objective: Union[Callable[[TensorType], TensorType], Observer],
+    keys: Sequence[Tag],
     batch_size: int,
     num_query_points_per_batch: int,
-    key: Tag,
 ) -> None:
     x_ = tf.reshape(
         tf.constant(range(batch_size * num_query_points_per_batch), tf.float64),
         (num_query_points_per_batch, batch_size, 1),
     )
-    ys = mk_batch_observer(input_objective, key)(x_)
-
-    if key is None:
-        assert isinstance(ys, Dataset)
-        npt.assert_array_equal(ys.query_points, tf.reshape(x_, [-1, 1]))
-        npt.assert_array_equal(ys.observations, tf.reshape(x_, [-1, 1]))
-    else:
-        assert isinstance(ys, dict)
-        exp_keys = {LocalTag(key, i).tag for i in range(batch_size)}
+    ys = mk_batch_observer(input_objective, "baz")(x_)
+
+    assert isinstance(ys, dict)
+
+    # Check keys.
+    exp_keys = set()
+    for key in keys:
+        exp_keys.update({LocalTag(key, i).tag for i in range(batch_size)})
         exp_keys.add(key)
+    assert ys.keys() == exp_keys
+
+    # Check datasets.
+    for key in keys:
+        # Different observers (in parameterize above) return different observation values.
+        if key == "foo":
+            exp_o = x_ + 1
+        elif key == "bar":
+            exp_o = x_ - 1
+        else:
+            exp_o = x_
 
-        assert ys.keys() == exp_keys
         npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
-        npt.assert_array_equal(ys[key].observations, tf.reshape(x_, [-1, 1]))
+        npt.assert_array_equal(ys[key].observations, tf.reshape(exp_o, [-1, 1]))
         for i in range(batch_size):
             npt.assert_array_equal(ys[LocalTag(key, i)].query_points, x_[:, i])
-            npt.assert_array_equal(ys[LocalTag(key, i)].observations, x_[:, i])
+            npt.assert_array_equal(ys[LocalTag(key, i)].observations, exp_o[:, i])
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 0f2c8e9f2b..3660f87171 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -500,7 +500,7 @@ def update(self, dataset: Dataset) -> None:
     for tag, model in models.items():
         model._tag = tag
 
-    observer = mk_batch_observer(lambda x: Dataset(x, x), OBJECTIVE)
+    observer = mk_batch_observer(lambda x: Dataset(x, x))
     rule = FixedAcquisitionRule(query_points)
     ask_tell = AskTellOptimizer(search_space, init_data, models, rule)
 
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index af16c4bbc0..d7a2592dad 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -158,13 +158,19 @@ def update_datasets(
         :param new_datasets: The new datasets.
         :return: The updated datasets.
         """
-        # Account for the case where there may be an initial dataset that is not tagged
-        # per region. In this case, only the global dataset will exist in datasets. We
-        # want to copy this initial dataset to all the regions.
-        #
+        # In order to support local datasets, account for the case where there may be an initial
+        # dataset that is not tagged per region. In this case, only the global dataset will exist
+        # in datasets. We want to copy this initial dataset to all the regions.
         # If a tag from tagged_output does not exist in datasets, then add it to
-        # datasets by copying the dataset from datasets with the same tag-prefix.
-        # Otherwise keep the existing dataset from datasets.
+        # datasets by copying the data from datasets with the same global tag. Otherwise keep the
+        # existing data from datasets.
+        #
+        # Note: this replication of initial data can potentially cause an issue when a global model
+        # is being used with local datasets, as the points may be repeated. This will only be an
+        # issue if two regions overlap and both contain that initial data-point -- as filtering
+        # (in BatchTrustRegion) would otherwise remove duplicates. The main way to avoid the issue
+        # in this scenario is to provide local initial datasets, instead of a global initial
+        # dataset.
         updated_datasets = {}
         for tag in new_datasets:
             _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 82a9a317c2..f17f915880 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -764,7 +764,7 @@ def optimize(
                     observer = self._observer
                     # If query_points are rank 3, then use a batched observer.
                     if tf.rank(query_points) == 3:
-                        observer = mk_batch_observer(observer, OBJECTIVE)
+                        observer = mk_batch_observer(observer)
                     observer_output = observer(query_points)
 
                     tagged_output = (
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 0984139381..2816374b4f 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -26,7 +26,7 @@
 from check_shapes import check_shapes
 
 from ..data import Dataset
-from ..observer import MultiObserver, Observer, SingleObserver
+from ..observer import OBJECTIVE, MultiObserver, Observer, SingleObserver
 from ..types import Tag, TensorType
 from ..utils.misc import LocalTag
 
@@ -64,21 +64,18 @@ def mk_multi_observer(**kwargs: Callable[[TensorType], TensorType]) -> MultiObse
 
 
 def mk_batch_observer(
-    objective_or_observer: Union[Callable[[TensorType], TensorType], SingleObserver],
-    key: Optional[Tag] = None,
+    objective_or_observer: Union[Callable[[TensorType], TensorType], Observer],
+    default_key: Tag = OBJECTIVE,
 ) -> Observer:
     """
     Create an observer that returns the data from ``objective`` or an existing ``observer``
     separately for each query point in a batch.
 
-    :param objective_or_observer: An objective or an existing observer designed to be used with a
-        single data set and model.
-    :param key: An optional key to use to access the data from the observer result.
+    :param objective_or_observer: An objective or an existing observer.
+    :param default_key: The default key to use if ``objective_or_observer`` is an objective or
+        does not return a mapping.
     :return: A multi-observer across the batch dimension of query points, returning the data from
-        ``objective``. If ``key`` is provided, the observer will be a mapping. Otherwise, it will
-        return a single dataset.
-    :raise ValueError (or tf.errors.InvalidArgumentError): If ``objective_or_observer`` is a
-        multi-observer.
+        ``objective_or_observer``.
     """
 
     @check_shapes("qps: [n_points, batch_size, n_dims]")
@@ -92,23 +89,26 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         qps = tf.reshape(qps, [-1, qps.shape[-1]])
         obs_or_dataset = objective_or_observer(qps)
 
-        if isinstance(obs_or_dataset, Mapping):
-            raise ValueError("mk_batch_observer does not support multi-observers")
-        elif not isinstance(obs_or_dataset, Dataset):
+        if not isinstance(obs_or_dataset, (Mapping, Dataset)):
+            # Just a single observation, so wrap in a dataset.
             obs_or_dataset = Dataset(qps, obs_or_dataset)
 
-        if key is None:
-            # Always use rank 2 shape as models (e.g. GPR) expect this, so return as is.
-            return obs_or_dataset
-        else:
+        if isinstance(obs_or_dataset, Dataset):
+            # Convert to a mapping with a default key.
+            obs_or_dataset = {default_key: obs_or_dataset}
+
+        datasets = {}
+        for key, dataset in obs_or_dataset.items():
             # Include overall dataset and per batch dataset.
-            obs = obs_or_dataset.observations
+            obs = dataset.observations
             qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
             obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
-            datasets: Mapping[Tag, Dataset] = {
-                key: obs_or_dataset,
+            _datasets = {
+                key: dataset,
                 **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)},
             }
-            return datasets
+            datasets.update(_datasets)
+
+        return datasets
 
     return _observer

From ee1ee568f39f2793527322ad2517a88c1eb75aae Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 11 Oct 2023 21:23:28 +0100
Subject: [PATCH 17/33] Allow multiple models/datasets for base rule

---
 tests/unit/acquisition/test_rule.py |   4 +-
 trieste/acquisition/rule.py         | 189 ++++++++++++++++------------
 2 files changed, 112 insertions(+), 81 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index 730fa3c096..5d17465fe1 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -1213,7 +1213,7 @@ def test_trust_region_box_get_dataset_min() -> None:
     trb = SingleObjectiveTrustRegionBox(search_space)
     trb._lower = tf.constant([0.2, 0.2], dtype=tf.float64)
     trb._upper = tf.constant([0.7, 0.7], dtype=tf.float64)
-    x_min, y_min = trb.get_dataset_min(dataset)
+    x_min, y_min = trb.get_dataset_min({"foo": dataset})
     npt.assert_array_equal(x_min, tf.constant([0.3, 0.4], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
 
@@ -1227,7 +1227,7 @@ def test_trust_region_box_get_dataset_min_outside_search_space() -> None:
         tf.constant([[0.7], [0.9]], dtype=tf.float64),
     )
     trb = SingleObjectiveTrustRegionBox(search_space)
-    x_min, y_min = trb.get_dataset_min(dataset)
+    x_min, y_min = trb.get_dataset_min({"foo": dataset})
     npt.assert_array_equal(x_min, tf.constant([1.2, 1.3], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([np.inf], dtype=tf.float64))
 
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index d7a2592dad..af108735df 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -20,9 +20,22 @@
 import copy
 import math
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from collections.abc import Mapping
 from dataclasses import dataclass
-from typing import Any, Callable, Generic, Optional, Sequence, Tuple, TypeVar, Union, cast, overload
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+    overload,
+)
 
 import numpy as np
 from check_shapes import check_shapes, inherit_check_shapes
@@ -1005,51 +1018,53 @@ def update(
         """
         ...
 
-    def select_model(
+    def select_models(
         self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Tuple[Optional[Tag], Optional[ProbabilisticModelType]]:
+    ) -> Optional[Mapping[Tag, ProbabilisticModelType]]:
         """
-        Select a single model belonging to this region. This is an optional method that is
-        only required if the region is used with single model acquisition functions.
+        Select models belonging to this region for acquisition.
 
         :param models: The model for each tag.
-        :return: The model belonging to this region.
+        :return: The models belonging to this region.
         """
-        # By default return the OBJECTIVE model.
-        return get_value_for_tag(models, OBJECTIVE)
+        # By default return all the models.
+        return models
 
-    def select_dataset(
+    def select_datasets(
         self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Tuple[Optional[Tag], Optional[Dataset]]:
+    ) -> Optional[Mapping[Tag, Dataset]]:
         """
-        Select a single dataset belonging to this region. This is an optional method that is
-        only required if the region is used with single model acquisition functions.
+        Select datasets belonging to this region for acquisition.
 
         :param datasets: The dataset for each tag.
-        :return: The tag and associated dataset belonging to this region.
+        :return: The datasets belonging to this region.
         """
-        # By default return the OBJECTIVE dataset.
-        return get_value_for_tag(datasets, OBJECTIVE)
+        # By default return all the datasets.
+        return datasets
 
-    def get_dataset_filter_mask(
+    def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
+    ) -> Optional[Mapping[Tag, tf.Tensor]]:
         """
-        Return a boolean mask that can be used to filter out points from the dataset that
+        Return a boolean mask that can be used to filter out points from the datasets that
         belong to this region.
 
         :param datasets: The dataset for each tag.
-        :return: The tag for the selected dataset and a boolean mask that can be used to filter
-            that dataset. A value of `True` indicates that the corresponding point should be kept.
+        :return: A mapping for each tag belonging to this region, to a boolean mask that can be
+            used to filter out points from the datasets. A value of `True` indicates that the
+            corresponding point should be kept.
         """
-        # Always select the region dataset for filtering. Don't directly filter the global dataset.
+        # Only select the region datasets for filtering. Don't directly filter the global dataset.
         assert self.index is not None, "the index should be set for filtering local datasets"
-        tag, dataset = get_value_for_tag(datasets, LocalTag(OBJECTIVE, self.index))
-        if dataset is None:
-            return None, None
+        if datasets is None:
+            return None
         else:
             # By default return a mask that filters nothing.
-            return tag, tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+            return {
+                tag: tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+                for tag, dataset in datasets.items()
+                if LocalTag.from_tag(tag).local_index == self.index
+            }
 
 
 UpdatableTrustRegionType = TypeVar("UpdatableTrustRegionType", bound=UpdatableTrustRegion)
@@ -1143,27 +1158,28 @@ def acquire(
         assert self._tags is not None
         assert self._init_subspaces is not None
 
-        num_subspaces = len(self._tags)
-        num_local_models = 0
+        num_local_models: Dict[Tag, int] = defaultdict(int)
         for tag in models:
             ltag = LocalTag.from_tag(tag)
-            if ltag.is_local and ltag.global_tag == OBJECTIVE:
-                num_local_models += 1
-        assert num_local_models in [0, num_subspaces], (
+            if ltag.is_local:
+                num_local_models[ltag.global_tag] += 1
+        num_local_models_vals = set(num_local_models.values())
+        assert (
+            len(num_local_models_vals) <= 1
+        ), f"The number of local models should be the same for all tags, got {num_local_models}"
+        _num_local_models = 0 if len(num_local_models_vals) == 0 else num_local_models_vals.pop()
+
+        num_subspaces = len(self._tags)
+        assert _num_local_models in [0, num_subspaces], (
             f"When using local models, the number of subspaces {num_subspaces} should be equal to "
-            f"the number of local objective models {num_local_models}"
+            f"the number of local objective models {_num_local_models}"
         )
 
-        # If the base rule is a single model acquisition rule, but we have local
-        # models, run the (deepcopied) base rule sequentially for each subspace.
+        # If we have local models, run the (deepcopied) base rule sequentially for each subspace.
         # Otherwise, run the base rule as is, once with all models and datasets.
         # Note: this should only trigger on the first call to `acquire`, as after that we will
         # have a list of rules in `self._rules`.
-        if (
-            isinstance(self._rule, EfficientGlobalOptimization)
-            and hasattr(self._rule._builder, "single_builder")
-            and (num_local_models > 0 or OBJECTIVE not in models)
-        ):
+        if _num_local_models > 0:
             self._rules = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
 
         def state_func(
@@ -1209,16 +1225,20 @@ def state_func(
             if self._rules is not None:
                 _points = []
                 for subspace, rule in zip(subspaces, self._rules):
-                    _, _model = subspace.select_model(models)
-                    _, _dataset = subspace.select_dataset(datasets)
-                    assert _model is not None
-                    # Using default tag, as that is what single model acquisition builders expect.
-                    model = {OBJECTIVE: _model}
-                    if _dataset is None:
-                        dataset = None
-                    else:
-                        dataset = {OBJECTIVE: _dataset}
-                    _points.append(rule.acquire(subspace, model, dataset))
+                    _models = subspace.select_models(models)
+                    _datasets = subspace.select_datasets(datasets)
+                    assert _models is not None
+                    # Remap all local tags to global ones. One reason is that single model
+                    # acquisition builders expect OBJECTIVE to exist.
+                    _models = {
+                        LocalTag.from_tag(tag).global_tag: model for tag, model in _models.items()
+                    }
+                    if _datasets is not None:
+                        _datasets = {
+                            LocalTag.from_tag(tag).global_tag: dataset
+                            for tag, dataset in _datasets.items()
+                        }
+                    _points.append(rule.acquire(subspace, _models, _datasets))
                 points = tf.stack(_points, axis=1)
             else:
                 points = self._rule.acquire(acquisition_space, models, datasets)
@@ -1289,11 +1309,12 @@ def update_datasets(
         # be to remove this assumption.
         assert self._init_subspaces is not None
         for subspace in self._init_subspaces:
-            tag, in_region = subspace.get_dataset_filter_mask(datasets)
-            assert tag is not None
-            ltag = LocalTag.from_tag(tag)
-            assert ltag.is_local, f"can only filter local tags, got {tag}"
-            used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
+            in_region_masks = subspace.get_datasets_filter_mask(datasets)
+            if in_region_masks is not None:
+                for tag, in_region in in_region_masks.items():
+                    ltag = LocalTag.from_tag(tag)
+                    assert ltag.is_local, f"can only filter local tags, got {tag}"
+                    used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
         filtered_datasets = {}
         global_tags = []  # Global datasets to re-generate.
@@ -1381,7 +1402,7 @@ def initialize(
         Initialize the box by sampling a location from the global search space and setting the
         bounds.
         """
-        _, dataset = self.select_dataset(datasets)
+        dataset = self.select_datasets(datasets)
 
         self.location = tf.squeeze(self.global_search_space.sample(1), axis=0)
         self._step_is_success = False
@@ -1405,7 +1426,7 @@ def update(
         ``1 / beta``. Conversely, if it was unsuccessful, the size is reduced by the factor
         ``beta``.
         """
-        _, dataset = self.select_dataset(datasets)
+        dataset = self.select_datasets(datasets)
 
         if tf.reduce_any(self.eps < self._min_eps):
             self.initialize(models, datasets)
@@ -1420,22 +1441,21 @@ def update(
         self._update_bounds()
         self._y_min = y_min
 
-    def select_model(
+    def select_models(
         self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Tuple[Optional[Tag], Optional[ProbabilisticModelType]]:
-        # Select the model belonging to this box. Note there isn't necessarily a one-to-one
-        # mapping between regions and models.
+    ) -> Optional[Mapping[Tag, ProbabilisticModelType]]:
+        # Select the model belonging to this box.
         if self.index is None:
             tags = [OBJECTIVE]  # If no index, then pick the global model.
         else:
             tags = [LocalTag(OBJECTIVE, self.index), OBJECTIVE]  # Prefer local model if available.
-        return get_value_for_tag(models, tags)
+        tag, model = get_value_for_tag(models, tags)
+        return {tag: model} if model is not None else None
 
-    def select_dataset(
+    def select_datasets(
         self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Tuple[Optional[Tag], Optional[Dataset]]:
-        # Select the dataset belonging to this box. Note there isn't necessarily a one-to-one
-        # mapping between regions and datasets.
+    ) -> Optional[Mapping[Tag, Dataset]]:
+        # Select the dataset belonging to this box.
         if self.index is None:
             tags = [OBJECTIVE]  # If no index, then pick the global dataset.
         else:
@@ -1443,28 +1463,35 @@ def select_dataset(
                 LocalTag(OBJECTIVE, self.index),
                 OBJECTIVE,
             ]  # Prefer local dataset if available.
-        return get_value_for_tag(datasets, tags)
+        tag, dataset = get_value_for_tag(datasets, tags)
+        return {tag: dataset} if dataset is not None else None
 
-    def get_dataset_filter_mask(
+    def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
-        # Always select the region dataset for filtering. Don't directly filter the global dataset.
+    ) -> Optional[Mapping[Tag, tf.Tensor]]:
+        # Only select the region datasets for filtering. Don't directly filter the global dataset.
         assert self.index is not None, "the index should be set for filtering local datasets"
-        tag, dataset = get_value_for_tag(datasets, LocalTag(OBJECTIVE, self.index))
-        if dataset is None:
-            return None, None
+        if datasets is None:
+            return None
         else:
             # Only keep points that are in the box.
-            return tag, self.contains(dataset.query_points)
+            return {
+                tag: self.contains(dataset.query_points)
+                for tag, dataset in datasets.items()
+                if LocalTag.from_tag(tag).local_index == self.index
+            }
 
     @check_shapes(
         "return[0]: [D]",
         "return[1]: []",
     )
-    def get_dataset_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
+    def get_dataset_min(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[TensorType, TensorType]:
         """Calculate the minimum of the box using the given dataset."""
-        if dataset is None:
+        if datasets is None:
             raise ValueError("""dataset must be provided""")
+        dataset = next(iter(datasets.values()))  # Expect only one dataset.
 
         in_tr = self.contains(dataset.query_points)
         in_tr_obs = tf.where(
@@ -1603,17 +1630,21 @@ def initialize(
 
         super().initialize(models, datasets)
 
-    def get_dataset_filter_mask(
+    def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Tuple[Optional[Tag], Optional[tf.Tensor]]:
+    ) -> Optional[Mapping[Tag, tf.Tensor]]:
         # Don't filter out any points from the dataset by bypassing the
         # SingleObjectiveTrustRegionBox method.
-        return super(SingleObjectiveTrustRegionBox, self).get_dataset_filter_mask(datasets)
+        return super(SingleObjectiveTrustRegionBox, self).get_datasets_filter_mask(datasets)
 
     @inherit_check_shapes
-    def get_dataset_min(self, dataset: Optional[Dataset]) -> Tuple[TensorType, TensorType]:
-        if dataset is None:
+    def get_dataset_min(
+        self, datasets: Optional[Mapping[Tag, Dataset]]
+    ) -> Tuple[TensorType, TensorType]:
+        """Calculate the minimum of the box using the given dataset."""
+        if datasets is None:
             raise ValueError("""dataset must be provided""")
+        dataset = next(iter(datasets.values()))  # Expect only one dataset.
 
         # Always return the global minimum.
         ix = tf.argmin(dataset.observations)

From 7d03a04c7a59eafe075f1585b87f40939a037b2b Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 12 Oct 2023 12:16:59 +0100
Subject: [PATCH 18/33] Support multiple models/datasets in region selects

---
 tests/unit/acquisition/test_rule.py |  34 +++++--
 trieste/acquisition/rule.py         | 150 +++++++++++++++++-----------
 2 files changed, 115 insertions(+), 69 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index 5d17465fe1..ad481ad9f1 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -545,16 +545,23 @@ def test_async_keeps_track_of_pending_points(
     npt.assert_allclose(state.pending_points, tf.concat([point2, point3], axis=0))
 
 
-@pytest.mark.parametrize("datasets", [{}, {"foo": empty_dataset([1], [1])}])
+@pytest.mark.parametrize(
+    "datasets",
+    [
+        {},
+        {"foo": empty_dataset([1], [1])},
+        {OBJECTIVE: empty_dataset([1], [1]), "foo": empty_dataset([1], [1])},
+    ],
+)
 @pytest.mark.parametrize(
     "models", [{}, {"foo": QuadraticMeanAndRBFKernel()}, {OBJECTIVE: QuadraticMeanAndRBFKernel()}]
 )
 def test_trego_raises_for_missing_datasets_key(
-    datasets: dict[Tag, Dataset], models: dict[Tag, ProbabilisticModel]
+    datasets: Mapping[Tag, Dataset], models: dict[Tag, ProbabilisticModel]
 ) -> None:
     search_space = Box([-1], [1])
     rule = BatchTrustRegionBox(TREGOBox(search_space))  # type: ignore[var-annotated]
-    with pytest.raises(ValueError, match="none of the tags '.LocalTag.OBJECTIVE, 0., "):
+    with pytest.raises(ValueError, match="a single OBJECTIVE dataset must be provided"):
         rule.acquire(search_space, models, datasets=datasets)(None)
 
 
@@ -1195,12 +1202,21 @@ def test_turbo_state_deepcopy() -> None:
     npt.assert_allclose(tr_state_copy.y_min, tr_state.y_min)
 
 
-# get_dataset_min raises if dataset is None.
-def test_trust_region_box_get_dataset_min_raises_if_dataset_is_none() -> None:
+@pytest.mark.parametrize(
+    "datasets",
+    [
+        {},
+        {"foo": empty_dataset([1], [1])},
+        {OBJECTIVE: empty_dataset([1], [1]), "foo": empty_dataset([1], [1])},
+    ],
+)
+def test_trust_region_box_get_dataset_min_raises_if_dataset_is_faulty(
+    datasets: Mapping[Tag, Dataset]
+) -> None:
     search_space = Box([0.0, 0.0], [1.0, 1.0])
     trb = SingleObjectiveTrustRegionBox(search_space)
-    with pytest.raises(ValueError, match="dataset must be provided"):
-        trb.get_dataset_min(None)
+    with pytest.raises(ValueError, match="a single OBJECTIVE dataset must be provided"):
+        trb.get_dataset_min(datasets)
 
 
 # get_dataset_min picks the minimum x and y values from the dataset.
@@ -1213,7 +1229,7 @@ def test_trust_region_box_get_dataset_min() -> None:
     trb = SingleObjectiveTrustRegionBox(search_space)
     trb._lower = tf.constant([0.2, 0.2], dtype=tf.float64)
     trb._upper = tf.constant([0.7, 0.7], dtype=tf.float64)
-    x_min, y_min = trb.get_dataset_min({"foo": dataset})
+    x_min, y_min = trb.get_dataset_min({OBJECTIVE: dataset})
     npt.assert_array_equal(x_min, tf.constant([0.3, 0.4], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([0.2], dtype=tf.float64))
 
@@ -1227,7 +1243,7 @@ def test_trust_region_box_get_dataset_min_outside_search_space() -> None:
         tf.constant([[0.7], [0.9]], dtype=tf.float64),
     )
     trb = SingleObjectiveTrustRegionBox(search_space)
-    x_min, y_min = trb.get_dataset_min({"foo": dataset})
+    x_min, y_min = trb.get_dataset_min({OBJECTIVE: dataset})
     npt.assert_array_equal(x_min, tf.constant([1.2, 1.3], dtype=tf.float64))
     npt.assert_array_equal(y_min, tf.constant([np.inf], dtype=tf.float64))
 
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index af108735df..3e0a364c24 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -30,6 +30,7 @@
     Generic,
     Optional,
     Sequence,
+    Set,
     Tuple,
     TypeVar,
     Union,
@@ -1018,6 +1019,23 @@ def update(
         """
         ...
 
+    def _get_tags(self, tags: Set[Tag]) -> Tuple[Set[Tag], Set[Tag]]:
+        # Separate tags into local (matching index) and global tags (without matching
+        # local tag).
+        local_gtags = set()
+        global_tags = set()
+        for tag in tags:
+            ltag = LocalTag.from_tag(tag)
+            if not ltag.is_local:
+                global_tags.add(tag)
+            elif ltag.local_index == self.index:
+                local_gtags.add(ltag.global_tag)
+
+        # Only keep global tags that don't have a matching local tag.
+        global_tags = global_tags.difference(local_gtags)
+
+        return local_gtags, global_tags
+
     def select_models(
         self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
     ) -> Optional[Mapping[Tag, ProbabilisticModelType]]:
@@ -1027,8 +1045,25 @@ def select_models(
         :param models: The model for each tag.
         :return: The models belonging to this region.
         """
-        # By default return all the models.
-        return models
+        if models is None:
+            _models = {}
+        elif self.index is None:
+            # If no index, then return the global models.
+            _models = {
+                tag: model for tag, model in models.items() if not LocalTag.from_tag(tag).is_local
+            }
+        else:
+            # Prefer matching local model for each tag, otherwise select the global model.
+            local_gtags, global_tags = self._get_tags(set(models))
+
+            _models = {}
+            for tag in local_gtags:
+                ltag = LocalTag(tag, self.index)
+                _models[ltag] = models[ltag]
+            for tag in global_tags:
+                _models[tag] = models[tag]
+
+        return _models if _models else None
 
     def select_datasets(
         self, datasets: Optional[Mapping[Tag, Dataset]]
@@ -1039,8 +1074,27 @@ def select_datasets(
         :param datasets: The dataset for each tag.
         :return: The datasets belonging to this region.
         """
-        # By default return all the datasets.
-        return datasets
+        if datasets is None:
+            _datasets = {}
+        elif self.index is None:
+            # If no index, then return the global datasets.
+            _datasets = {
+                tag: dataset
+                for tag, dataset in datasets.items()
+                if not LocalTag.from_tag(tag).is_local
+            }
+        else:
+            # Prefer matching local dataset for each tag, otherwise select the global dataset.
+            local_gtags, global_tags = self._get_tags(set(datasets))
+
+            _datasets = {}
+            for tag in local_gtags:
+                ltag = LocalTag(tag, self.index)
+                _datasets[ltag] = datasets[ltag]
+            for tag in global_tags:
+                _datasets[tag] = datasets[tag]
+
+        return _datasets if _datasets else None
 
     def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
@@ -1059,9 +1113,9 @@ def get_datasets_filter_mask(
         if datasets is None:
             return None
         else:
-            # By default return a mask that filters nothing.
+            # Only keep points that are in the box.
             return {
-                tag: tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+                tag: self.contains(dataset.query_points)
                 for tag, dataset in datasets.items()
                 if LocalTag.from_tag(tag).local_index == self.index
             }
@@ -1172,7 +1226,7 @@ def acquire(
         num_subspaces = len(self._tags)
         assert _num_local_models in [0, num_subspaces], (
             f"When using local models, the number of subspaces {num_subspaces} should be equal to "
-            f"the number of local objective models {_num_local_models}"
+            f"the number of local models {_num_local_models}"
         )
 
         # If we have local models, run the (deepcopied) base rule sequentially for each subspace.
@@ -1402,13 +1456,13 @@ def initialize(
         Initialize the box by sampling a location from the global search space and setting the
         bounds.
         """
-        dataset = self.select_datasets(datasets)
+        datasets = self.select_datasets(datasets)
 
         self.location = tf.squeeze(self.global_search_space.sample(1), axis=0)
         self._step_is_success = False
         self._init_eps()
         self._update_bounds()
-        _, self._y_min = self.get_dataset_min(dataset)
+        _, self._y_min = self.get_dataset_min(datasets)
 
     def update(
         self,
@@ -1426,13 +1480,13 @@ def update(
         ``1 / beta``. Conversely, if it was unsuccessful, the size is reduced by the factor
         ``beta``.
         """
-        dataset = self.select_datasets(datasets)
+        datasets = self.select_datasets(datasets)
 
         if tf.reduce_any(self.eps < self._min_eps):
             self.initialize(models, datasets)
             return
 
-        x_min, y_min = self.get_dataset_min(dataset)
+        x_min, y_min = self.get_dataset_min(datasets)
         self.location = x_min
 
         tr_volume = tf.reduce_prod(self.upper - self.lower)
@@ -1441,46 +1495,6 @@ def update(
         self._update_bounds()
         self._y_min = y_min
 
-    def select_models(
-        self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Optional[Mapping[Tag, ProbabilisticModelType]]:
-        # Select the model belonging to this box.
-        if self.index is None:
-            tags = [OBJECTIVE]  # If no index, then pick the global model.
-        else:
-            tags = [LocalTag(OBJECTIVE, self.index), OBJECTIVE]  # Prefer local model if available.
-        tag, model = get_value_for_tag(models, tags)
-        return {tag: model} if model is not None else None
-
-    def select_datasets(
-        self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Optional[Mapping[Tag, Dataset]]:
-        # Select the dataset belonging to this box.
-        if self.index is None:
-            tags = [OBJECTIVE]  # If no index, then pick the global dataset.
-        else:
-            tags = [
-                LocalTag(OBJECTIVE, self.index),
-                OBJECTIVE,
-            ]  # Prefer local dataset if available.
-        tag, dataset = get_value_for_tag(datasets, tags)
-        return {tag: dataset} if dataset is not None else None
-
-    def get_datasets_filter_mask(
-        self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Optional[Mapping[Tag, tf.Tensor]]:
-        # Only select the region datasets for filtering. Don't directly filter the global dataset.
-        assert self.index is not None, "the index should be set for filtering local datasets"
-        if datasets is None:
-            return None
-        else:
-            # Only keep points that are in the box.
-            return {
-                tag: self.contains(dataset.query_points)
-                for tag, dataset in datasets.items()
-                if LocalTag.from_tag(tag).local_index == self.index
-            }
-
     @check_shapes(
         "return[0]: [D]",
         "return[1]: []",
@@ -1489,9 +1503,13 @@ def get_dataset_min(
         self, datasets: Optional[Mapping[Tag, Dataset]]
     ) -> Tuple[TensorType, TensorType]:
         """Calculate the minimum of the box using the given dataset."""
-        if datasets is None:
-            raise ValueError("""dataset must be provided""")
-        dataset = next(iter(datasets.values()))  # Expect only one dataset.
+        if (
+            datasets is None
+            or len(datasets) != 1
+            or LocalTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
+        ):
+            raise ValueError("""a single OBJECTIVE dataset must be provided""")
+        dataset = next(iter(datasets.values()))
 
         in_tr = self.contains(dataset.query_points)
         in_tr_obs = tf.where(
@@ -1633,18 +1651,30 @@ def initialize(
     def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
     ) -> Optional[Mapping[Tag, tf.Tensor]]:
-        # Don't filter out any points from the dataset by bypassing the
-        # SingleObjectiveTrustRegionBox method.
-        return super(SingleObjectiveTrustRegionBox, self).get_datasets_filter_mask(datasets)
+        # Only select the region datasets for filtering. Don't directly filter the global dataset.
+        assert self.index is not None, "the index should be set for filtering local datasets"
+        if datasets is None:
+            return None
+        else:
+            # Don't filter out any points from the dataset. Always keep the entire dataset.
+            return {
+                tag: tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
+                for tag, dataset in datasets.items()
+                if LocalTag.from_tag(tag).local_index == self.index
+            }
 
     @inherit_check_shapes
     def get_dataset_min(
         self, datasets: Optional[Mapping[Tag, Dataset]]
     ) -> Tuple[TensorType, TensorType]:
         """Calculate the minimum of the box using the given dataset."""
-        if datasets is None:
-            raise ValueError("""dataset must be provided""")
-        dataset = next(iter(datasets.values()))  # Expect only one dataset.
+        if (
+            datasets is None
+            or len(datasets) != 1
+            or LocalTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
+        ):
+            raise ValueError("""a single OBJECTIVE dataset must be provided""")
+        dataset = next(iter(datasets.values()))
 
         # Always return the global minimum.
         ix = tf.argmin(dataset.observations)

From 8551d7275bdb3478ffc298a78fb5b44d29d6acd8 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 12 Oct 2023 17:23:56 +0100
Subject: [PATCH 19/33] Fix TR plotting history colors

---
 trieste/experimental/plotting/plotting.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/trieste/experimental/plotting/plotting.py b/trieste/experimental/plotting/plotting.py
index 8faf81e865..cb9745722a 100644
--- a/trieste/experimental/plotting/plotting.py
+++ b/trieste/experimental/plotting/plotting.py
@@ -33,9 +33,11 @@
 from trieste.acquisition import AcquisitionFunction
 from trieste.acquisition.multi_objective.dominance import non_dominated
 from trieste.bayesian_optimizer import FrozenRecord, Record, StateType
+from trieste.observer import OBJECTIVE
 from trieste.space import TaggedMultiSearchSpace
 from trieste.types import TensorType
 from trieste.utils import to_numpy
+from trieste.utils.misc import LocalTag
 
 
 def create_grid(
@@ -588,9 +590,24 @@ def plot_trust_region_history_2d(
     if num_query_points is None:
         num_query_points = len(spaces)
 
-    query_points = history.dataset.query_points
-    new_points_mask = np.zeros(query_points.shape[0], dtype=bool)
-    new_points_mask[-num_query_points:] = True
+    query_points = history.dataset.query_points  # All query points.
+
+    # If there are local datasets, use them to generate the colors for the query points.
+    # Otherwise, use the global dataset and assume the last `num_query_points` points are new.
+    if len(history.datasets) > 1:
+        # Expect there to be an objective dataset for each subspace.
+        datasets = [history.datasets[LocalTag(OBJECTIVE, i)] for i in range(len(spaces))]
+        _new_points_mask = [
+            np.zeros(dataset.query_points.shape[0], dtype=bool) for dataset in datasets
+        ]
+        # Last point in each dataset is the new point.
+        for mask in _new_points_mask:
+            mask[-1] = True
+        # Concatenate the masks.
+        new_points_mask = np.concatenate(_new_points_mask)
+    else:
+        new_points_mask = np.zeros(query_points.shape[0], dtype=bool)
+        new_points_mask[-num_query_points:] = True
 
     # Plot trust regions.
     colors = [rgb2hex(color) for color in cm.rainbow(np.linspace(0, 1, num_query_points))]

From b571733ae2b38fe38741c2ca61bb4d04dd4cfc35 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 12 Oct 2023 17:51:14 +0100
Subject: [PATCH 20/33] Add notebook init points explanation

---
 docs/notebooks/trust_region.pct.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index 589b535360..27a2888e0e 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -242,7 +242,9 @@ def plot_history(
 # %% [markdown]
 # ### Visualizing batch trust region results
 #
-# We visualize the results as before.
+# We visualize the results as before. However, please note that the initial query points (crosses) are
+# not highlighted in these plots. On each iteration, the batch trust region rule filters out points
+# that are not in the regions anymore; so there isn't an easy way to track the initial points.
 
 # %%
 plot_final_result(dataset, num_init_points=0)

From 2a934d1af9b8fa35c313a3979a6f2814d6ca8e5d Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Mon, 16 Oct 2023 16:47:08 +0100
Subject: [PATCH 21/33] Rename region index and add init param

---
 trieste/acquisition/rule.py | 44 ++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 3e0a364c24..23aa60ebbf 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -988,8 +988,13 @@ def acquire(
 class UpdatableTrustRegion(SearchSpace):
     """A search space that can be updated."""
 
-    def __init__(self) -> None:
-        self.index: Optional[int] = None
+    def __init__(self, region_index: Optional[int] = None) -> None:
+        """
+        :param region_index: The index of the region in a multi-region search space. This is used to
+            identify the local models and datasets to use for acquisition. If `None`, the
+            global models and datasets are used.
+        """
+        self.region_index = region_index
 
     @abstractmethod
     def initialize(
@@ -1028,7 +1033,7 @@ def _get_tags(self, tags: Set[Tag]) -> Tuple[Set[Tag], Set[Tag]]:
             ltag = LocalTag.from_tag(tag)
             if not ltag.is_local:
                 global_tags.add(tag)
-            elif ltag.local_index == self.index:
+            elif ltag.local_index == self.region_index:
                 local_gtags.add(ltag.global_tag)
 
         # Only keep global tags that don't have a matching local tag.
@@ -1047,7 +1052,7 @@ def select_models(
         """
         if models is None:
             _models = {}
-        elif self.index is None:
+        elif self.region_index is None:
             # If no index, then return the global models.
             _models = {
                 tag: model for tag, model in models.items() if not LocalTag.from_tag(tag).is_local
@@ -1058,7 +1063,7 @@ def select_models(
 
             _models = {}
             for tag in local_gtags:
-                ltag = LocalTag(tag, self.index)
+                ltag = LocalTag(tag, self.region_index)
                 _models[ltag] = models[ltag]
             for tag in global_tags:
                 _models[tag] = models[tag]
@@ -1076,7 +1081,7 @@ def select_datasets(
         """
         if datasets is None:
             _datasets = {}
-        elif self.index is None:
+        elif self.region_index is None:
             # If no index, then return the global datasets.
             _datasets = {
                 tag: dataset
@@ -1089,7 +1094,7 @@ def select_datasets(
 
             _datasets = {}
             for tag in local_gtags:
-                ltag = LocalTag(tag, self.index)
+                ltag = LocalTag(tag, self.region_index)
                 _datasets[ltag] = datasets[ltag]
             for tag in global_tags:
                 _datasets[tag] = datasets[tag]
@@ -1109,7 +1114,9 @@ def get_datasets_filter_mask(
             corresponding point should be kept.
         """
         # Only select the region datasets for filtering. Don't directly filter the global dataset.
-        assert self.index is not None, "the index should be set for filtering local datasets"
+        assert (
+            self.region_index is not None
+        ), "the region_index should be set for filtering local datasets"
         if datasets is None:
             return None
         else:
@@ -1117,7 +1124,7 @@ def get_datasets_filter_mask(
             return {
                 tag: self.contains(dataset.query_points)
                 for tag, dataset in datasets.items()
-                if LocalTag.from_tag(tag).local_index == self.index
+                if LocalTag.from_tag(tag).local_index == self.region_index
             }
 
 
@@ -1173,7 +1180,7 @@ def __init__(
                 init_subspaces = [init_subspaces]
             self._init_subspaces = tuple(init_subspaces)
             for index, subspace in enumerate(self._init_subspaces):
-                subspace.index = index
+                subspace.region_index = index  # Override the index.
             self._tags = tuple([str(index) for index in range(len(init_subspaces))])
 
         self._rule = rule
@@ -1409,6 +1416,7 @@ def __init__(
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
+        region_index: Optional[int] = None,
     ):
         """
         Calculates the bounds of the box from the location/centre and global bounds.
@@ -1419,6 +1427,9 @@ def __init__(
             considered a success.
         :param min_eps: The minimal size of the search space. If the size of the search space is
             smaller than this, the search space is reinitialized.
+        :param region_index: The index of the region in a multi-region search space. This is used to
+            identify the local models and datasets to use for acquisition. If `None`, the
+            global models and datasets are used.
         """
 
         self._global_search_space = global_search_space
@@ -1427,7 +1438,7 @@ def __init__(
         self._min_eps = min_eps
 
         super().__init__(global_search_space.lower, global_search_space.upper)
-        super(Box, self).__init__()
+        super(Box, self).__init__(region_index)
 
     @property
     def global_search_space(self) -> SearchSpace:
@@ -1551,7 +1562,7 @@ def acquire(
                 [SingleObjectiveTrustRegionBox(search_space) for _ in range(num_query_points)]
             )
             for index, subspace in enumerate(self._init_subspaces):
-                subspace.index = index
+                subspace.region_index = index  # Override the index.
             self._tags = tuple([str(index) for index in range(len(self._init_subspaces))])
 
         # Ensure passed in global search space is always the same as the search space passed to
@@ -1605,8 +1616,9 @@ def __init__(
         beta: float = 0.7,
         kappa: float = 1e-4,
         min_eps: float = 1e-2,
+        region_index: Optional[int] = None,
     ):
-        super().__init__(global_search_space, beta, kappa, min_eps)
+        super().__init__(global_search_space, beta, kappa, min_eps, region_index)
         self._is_global = False
         self._initialized = False
 
@@ -1652,7 +1664,9 @@ def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
     ) -> Optional[Mapping[Tag, tf.Tensor]]:
         # Only select the region datasets for filtering. Don't directly filter the global dataset.
-        assert self.index is not None, "the index should be set for filtering local datasets"
+        assert (
+            self.region_index is not None
+        ), "the region_index should be set for filtering local datasets"
         if datasets is None:
             return None
         else:
@@ -1660,7 +1674,7 @@ def get_datasets_filter_mask(
             return {
                 tag: tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
                 for tag, dataset in datasets.items()
-                if LocalTag.from_tag(tag).local_index == self.index
+                if LocalTag.from_tag(tag).local_index == self.region_index
             }
 
     @inherit_check_shapes

From 52f797596a9b312d99a562cbbe3a885bc1a301d7 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 17 Oct 2023 11:10:51 +0100
Subject: [PATCH 22/33] Remove old comment

---
 trieste/objectives/utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 2816374b4f..014b66feac 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -79,9 +79,6 @@ def mk_batch_observer(
     """
 
     @check_shapes("qps: [n_points, batch_size, n_dims]")
-    # Note that the return type is not correct, but that is what mypy is happy with. It should be
-    # Mapping[Tag, Dataset] if key is not None, otherwise Dataset.
-    # One solution is to create two separate functions, but that will result in some duplicate code.
     def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         # Call objective with rank 2 query points by flattening batch dimension.
         # Some objectives might only expect rank 2 query points, so this is safer.

From b2eb66271e67cdf42de64f28030f0c7eb0d389af Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Tue, 17 Oct 2023 11:19:16 +0100
Subject: [PATCH 23/33] Tidy-up redundant expression

---
 trieste/utils/misc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index f988a70da5..04a16cfad5 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -266,8 +266,8 @@ class LocalTag:
     local_index: Optional[int]
 
     def __post_init__(self) -> None:
-        if self.is_local and (self.local_index is None or self.local_index < 0):
-            raise ValueError("local index must be non-negative")
+        if self.local_index is not None and self.local_index < 0:
+            raise ValueError(f"local index must be non-negative, got {self.local_index}")
 
     @property
     def is_local(self) -> bool:

From 523aaabe143f68acd5e9c876a081a0f7e37130ee Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 18 Oct 2023 13:53:26 +0100
Subject: [PATCH 24/33] Keep full datasets along with filtered ones

---
 docs/notebooks/trust_region.pct.py        | 25 ++++-------
 tests/unit/acquisition/test_rule.py       | 21 +++++++--
 trieste/acquisition/rule.py               | 52 ++++++-----------------
 trieste/ask_tell_optimization.py          | 28 ++++++++++--
 trieste/bayesian_optimizer.py             | 16 +++++--
 trieste/experimental/plotting/plotting.py | 30 ++++++++++---
 6 files changed, 101 insertions(+), 71 deletions(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index 27a2888e0e..140f9f28d4 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -106,9 +106,7 @@ def build_model():
 from trieste.experimental.plotting import plot_bo_points, plot_function_2d
 
 
-def plot_final_result(
-    _dataset: trieste.data.Dataset, num_init_points=num_initial_data_points
-) -> None:
+def plot_final_result(_dataset: trieste.data.Dataset) -> None:
     arg_min_idx = tf.squeeze(tf.argmin(_dataset.observations, axis=0))
     query_points = _dataset.query_points.numpy()
     _, ax = plot_function_2d(
@@ -119,7 +117,7 @@ def plot_final_result(
         contour=True,
     )
 
-    plot_bo_points(query_points, ax[0, 0], num_init_points, arg_min_idx)
+    plot_bo_points(query_points, ax[0, 0], num_initial_data_points, arg_min_idx)
 
 
 plot_final_result(dataset)
@@ -146,10 +144,7 @@ def plot_final_result(
 )
 
 
-def plot_history(
-    result: trieste.bayesian_optimizer.OptimizationResult,
-    num_init_points=num_initial_data_points,
-) -> None:
+def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
     frames = []
     for step, hist in enumerate(
         result.history + [result.final_result.unwrap()]
@@ -159,7 +154,7 @@ def plot_history(
             search_space.lower,
             search_space.upper,
             hist,
-            num_init=num_init_points,
+            num_init=num_initial_data_points,
         )
 
         if fig is not None:
@@ -242,15 +237,13 @@ def plot_history(
 # %% [markdown]
 # ### Visualizing batch trust region results
 #
-# We visualize the results as before. However, please note that the initial query points (crosses) are
-# not highlighted in these plots. On each iteration, the batch trust region rule filters out points
-# that are not in the regions anymore; so there isn't an easy way to track the initial points.
+# We visualize the results as before.
 
 # %%
-plot_final_result(dataset, num_init_points=0)
+plot_final_result(dataset)
 
 # %%
-plot_history(result, num_init_points=0)
+plot_history(result)
 
 # %% [markdown]
 # ## TEST
@@ -285,10 +278,10 @@ def plot_history(
 dataset = result.try_get_final_dataset()
 
 # %%
-plot_final_result(dataset, num_init_points=0)
+plot_final_result(dataset)
 
 # %%
-plot_history(result, num_init_points=0)
+plot_history(result)
 
 # %% [markdown]
 # ## Trust region `TurBO` acquisition rule
diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index ad481ad9f1..d810116121 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -69,7 +69,7 @@
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace, TaggedMultiSearchSpace
 from trieste.types import State, Tag, TensorType
-from trieste.utils.misc import LocalTag
+from trieste.utils.misc import LocalTag, get_value_for_tag
 
 
 def _line_search_maximize(
@@ -795,7 +795,7 @@ def test_trego_always_uses_global_dataset() -> None:
         tf.constant([[0.5, -0.2], [0.7, 0.2], [1.1, 0.3], [0.5, 0.5]]),
         tf.constant([[0.7], [0.8], [0.9], [1.0]]),
     )
-    updated_datasets = tr.update_datasets({"OBJECTIVE__0": dataset}, {"OBJECTIVE__0": new_data})
+    updated_datasets = tr.filter_datasets({"OBJECTIVE__0": dataset + new_data})
 
     # Both the local and global datasets should match.
     assert updated_datasets.keys() == {"OBJECTIVE", "OBJECTIVE__0"}
@@ -1695,6 +1695,15 @@ def prepare_acquisition_function(
             },
             1,
         ),
+        (
+            {
+                OBJECTIVE: mk_dataset([[-1.0]], [[-1.0]]),  # Should be ignored.
+                "OBJECTIVE__0": mk_dataset([[0.0], [1.0]], [[1.0], [1.0]]),
+                "OBJECTIVE__1": mk_dataset([[2.0], [1.0]], [[1.0], [1.0]]),
+                "OBJECTIVE__2": mk_dataset([[2.0], [3.0]], [[1.0], [1.0]]),
+            },
+            1,
+        ),
     ],
 )
 @pytest.mark.parametrize("num_query_points_per_region", [1, 2])
@@ -1718,7 +1727,13 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     observer = mk_batch_observer(quadratic)
     new_data = observer(points)
     assert not isinstance(new_data, Dataset)
-    datasets = rule.update_datasets(datasets, new_data)
+
+    updated_datasets = {}
+    for tag in new_data:
+        _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
+        assert dataset is not None
+        updated_datasets[tag] = dataset + new_data[tag]
+    datasets = rule.filter_datasets(updated_datasets)
 
     # Check local datasets.
     for i, subspace in enumerate(subspaces):
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 23aa60ebbf..c972b11688 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -64,7 +64,7 @@
 from ..observer import OBJECTIVE
 from ..space import Box, SearchSpace, TaggedMultiSearchSpace
 from ..types import State, Tag, TensorType
-from ..utils.misc import LocalTag, get_value_for_tag
+from ..utils.misc import LocalTag
 from .function import (
     BatchMonteCarloExpectedImprovement,
     ExpectedImprovement,
@@ -162,35 +162,15 @@ def acquire_single(
             datasets=None if dataset is None else {OBJECTIVE: dataset},
         )
 
-    def update_datasets(
-        self, datasets: Mapping[Tag, Dataset], new_datasets: Mapping[Tag, Dataset]
-    ) -> Mapping[Tag, Dataset]:
+    def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Dataset]:
         """
-        Update the datasets with new datasets.
+        Filter the datasets.
 
-        :param datasets: The current datasets.
-        :param new_datasets: The new datasets.
-        :return: The updated datasets.
+        :param datasets: The datasets to filter.
+        :return: The filtered datasets.
         """
-        # In order to support local datasets, account for the case where there may be an initial
-        # dataset that is not tagged per region. In this case, only the global dataset will exist
-        # in datasets. We want to copy this initial dataset to all the regions.
-        # If a tag from tagged_output does not exist in datasets, then add it to
-        # datasets by copying the data from datasets with the same global tag. Otherwise keep the
-        # existing data from datasets.
-        #
-        # Note: this replication of initial data can potentially cause an issue when a global model
-        # is being used with local datasets, as the points may be repeated. This will only be an
-        # issue if two regions overlap and both contain that initial data-point -- as filtering
-        # (in BatchTrustRegion) would otherwise remove duplicates. The main way to avoid the issue
-        # in this scenario is to provide local initial datasets, instead of a global initial
-        # dataset.
-        updated_datasets = {}
-        for tag in new_datasets:
-            _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
-            assert dataset is not None
-            updated_datasets[tag] = dataset + new_datasets[tag]
-        return updated_datasets
+        # No filtering by default.
+        return datasets
 
 
 class EfficientGlobalOptimization(
@@ -1353,11 +1333,7 @@ def get_initialize_subspaces_mask(
         """
         ...
 
-    def update_datasets(
-        self, datasets: Mapping[Tag, Dataset], new_datasets: Mapping[Tag, Dataset]
-    ) -> Mapping[Tag, Dataset]:
-        datasets = super().update_datasets(datasets, new_datasets)
-
+    def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Dataset]:
         # Filter out points that are not in any of the subspaces. This is done by creating a mask
         # for each local dataset that is True for points that are in any subspace.
         used_masks = {
@@ -1365,9 +1341,12 @@ def update_datasets(
             for tag, dataset in datasets.items()
             if LocalTag.from_tag(tag).is_local
         }
+
+        # Global datasets to re-generate.
+        global_tags = {LocalTag.from_tag(tag).global_tag for tag in used_masks}
+
         # Using init_subspaces here relies on the users not creating new subspaces after
-        # initialization. This is a reasonable assumption for now, however a better solution would
-        # be to remove this assumption.
+        # initialization. This is a reasonable assumption for now.
         assert self._init_subspaces is not None
         for subspace in self._init_subspaces:
             in_region_masks = subspace.get_datasets_filter_mask(datasets)
@@ -1378,17 +1357,12 @@ def update_datasets(
                     used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
         filtered_datasets = {}
-        global_tags = []  # Global datasets to re-generate.
         for tag, used_mask in used_masks.items():
             filtered_datasets[tag] = Dataset(
                 tf.boolean_mask(datasets[tag].query_points, used_mask),
                 tf.boolean_mask(datasets[tag].observations, used_mask),
             )
 
-            ltag = LocalTag.from_tag(tag)
-            if ltag.global_tag not in global_tags:
-                global_tags.append(ltag.global_tag)
-
         # Include global datasets.
         for gtag in global_tags:
             # Create global dataset from local datasets. This is done by concatenating the local
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index c0ba3487ce..b42227ff12 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -205,6 +205,7 @@ def __init__(
             )
 
         self._datasets = datasets
+        self._filtered_datasets = datasets
         self._models = models
 
         self._query_plot_dfs: dict[int, pd.DataFrame] = {}
@@ -405,7 +406,7 @@ def ask(self) -> TensorType:
 
         with Timer() as query_point_generation_timer:
             points_or_stateful = self._acquisition_rule.acquire(
-                self._search_space, self._models, datasets=self._datasets
+                self._search_space, self._models, datasets=self._filtered_datasets
             )
 
         if callable(points_or_stateful):
@@ -446,13 +447,34 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
                 f"match dataset keys {self._datasets.keys()}"
             )
 
-        self._datasets = self._acquisition_rule.update_datasets(self._datasets, new_data)
+        # In order to support local datasets, account for the case where there may be an initial
+        # dataset that is not tagged per region. In this case, only the global dataset will exist
+        # in datasets. We want to copy this initial dataset to all the regions.
+        # If a tag from tagged_output does not exist in datasets, then add it to
+        # datasets by copying the data from datasets with the same global tag. Otherwise keep the
+        # existing data from datasets.
+        #
+        # Note: this replication of initial data can potentially cause an issue when a global model
+        # is being used with local datasets, as the points may be repeated. This will only be an
+        # issue if two regions overlap and both contain that initial data-point -- as filtering
+        # (in BatchTrustRegion) would otherwise remove duplicates. The main way to avoid the issue
+        # in this scenario is to provide local initial datasets, instead of a global initial
+        # dataset.
+        updated_datasets = {}
+        for tag, new_dataset in new_data.items():
+            _, old_dataset = get_value_for_tag(
+                self._datasets, [tag, LocalTag.from_tag(tag).global_tag]
+            )
+            assert old_dataset is not None
+            updated_datasets[tag] = old_dataset + new_dataset
+        self._datasets = updated_datasets
+        self._filtered_datasets = self._acquisition_rule.filter_datasets(updated_datasets)
 
         with Timer() as model_fitting_timer:
             for tag, model in self._models.items():
                 # Always use the matching dataset to the model. If the model is
                 # local, then the dataset should be too by this stage.
-                dataset = self._datasets[tag]
+                dataset = self._filtered_datasets[tag]
                 model.update(dataset)
                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index f17f915880..f9d2f232df 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -646,6 +646,7 @@ def optimize(
         if not isinstance(models, Mapping):
             models = {OBJECTIVE: models}
 
+        filtered_datasets = datasets
         # reassure the type checker that everything is tagged
         datasets = cast(Dict[Tag, Dataset], datasets)
         models = cast(Dict[Tag, TrainableProbabilisticModelType], models)
@@ -754,7 +755,7 @@ def optimize(
                 with Timer() as total_step_wallclock_timer:
                     with Timer() as query_point_generation_timer:
                         points_or_stateful = acquisition_rule.acquire(
-                            self._search_space, models, datasets=datasets
+                            self._search_space, models, datasets=filtered_datasets
                         )
                         if callable(points_or_stateful):
                             acquisition_state, query_points = points_or_stateful(acquisition_state)
@@ -773,14 +774,23 @@ def optimize(
                         else {OBJECTIVE: observer_output}
                     )
 
-                    datasets = acquisition_rule.update_datasets(datasets, tagged_output)
+                    # See explanation in ask_tell_optimization.tell().
+                    updated_datasets = {}
+                    for tag, new_dataset in tagged_output.items():
+                        _, old_dataset = get_value_for_tag(
+                            datasets, [tag, LocalTag.from_tag(tag).global_tag]
+                        )
+                        assert old_dataset is not None
+                        updated_datasets[tag] = old_dataset + new_dataset
+                    datasets = updated_datasets
+                    filtered_datasets = acquisition_rule.filter_datasets(updated_datasets)
 
                     with Timer() as model_fitting_timer:
                         if fit_model:
                             for tag, model in models.items():
                                 # Always use the matching dataset to the model. If the model is
                                 # local, then the dataset should be too by this stage.
-                                dataset = datasets[tag]
+                                dataset = filtered_datasets[tag]
                                 model.update(dataset)
                                 model.optimize_and_save_result(dataset)
 
diff --git a/trieste/experimental/plotting/plotting.py b/trieste/experimental/plotting/plotting.py
index cb9745722a..7ce516aa0b 100644
--- a/trieste/experimental/plotting/plotting.py
+++ b/trieste/experimental/plotting/plotting.py
@@ -236,7 +236,7 @@ def batched_func(x: TensorType) -> TensorType:
 
 def format_point_markers(
     num_pts: int,
-    num_init: Optional[int] = None,
+    num_init: Optional[Union[int, TensorType]] = None,
     idx_best: Optional[TensorType] = None,
     mask_fail: Optional[TensorType] = None,
     m_init: str = "x",
@@ -249,7 +249,7 @@ def format_point_markers(
     Prepares point marker styles according to some BO factors.
 
     :param num_pts: total number of BO points
-    :param num_init: initial number of BO points
+    :param num_init: initial number of BO points; can also be a mask
     :param idx_best: index of the best BO point(s)
     :param mask_fail: Bool vector, True if the corresponding observation violates the constraint(s)
     :param m_init: marker for the initial BO points
@@ -264,7 +264,10 @@ def format_point_markers(
     col_pts = np.repeat(c_pass, num_pts)
     col_pts = col_pts.astype("<U15")
     mark_pts = np.repeat(m_init, num_pts)
-    mark_pts[num_init:] = m_add
+    if isinstance(num_init, int):
+        mark_pts[num_init:] = m_add
+    else:
+        mark_pts[np.where(~num_init)] = m_add
     if mask_fail is not None:
         col_pts[np.where(mask_fail)] = c_fail
     if idx_best is not None:
@@ -276,7 +279,7 @@ def format_point_markers(
 def plot_bo_points(
     pts: TensorType,
     ax: Axes,
-    num_init: Optional[int] = None,
+    num_init: Optional[Union[int, TensorType]] = None,
     idx_best: Optional[int] = None,
     mask_fail: Optional[TensorType] = None,
     obs_values: Optional[TensorType] = None,
@@ -292,7 +295,7 @@ def plot_bo_points(
 
     :param pts: [N, 2] x inputs
     :param ax: a plt axes object
-    :param num_init: initial number of BO points
+    :param num_init: initial number of BO points; can also be a mask
     :param idx_best: index of the best BO point
     :param mask_fail: Bool vector, True if the corresponding observation violates the constraint(s)
     :param obs_values: optional [N] outputs (for 3d plots)
@@ -590,13 +593,12 @@ def plot_trust_region_history_2d(
     if num_query_points is None:
         num_query_points = len(spaces)
 
-    query_points = history.dataset.query_points  # All query points.
-
     # If there are local datasets, use them to generate the colors for the query points.
     # Otherwise, use the global dataset and assume the last `num_query_points` points are new.
     if len(history.datasets) > 1:
         # Expect there to be an objective dataset for each subspace.
         datasets = [history.datasets[LocalTag(OBJECTIVE, i)] for i in range(len(spaces))]
+
         _new_points_mask = [
             np.zeros(dataset.query_points.shape[0], dtype=bool) for dataset in datasets
         ]
@@ -605,7 +607,21 @@ def plot_trust_region_history_2d(
             mask[-1] = True
         # Concatenate the masks.
         new_points_mask = np.concatenate(_new_points_mask)
+
+        if num_init is not None:
+            _num_init_mask = [
+                np.zeros(dataset.query_points.shape[0], dtype=bool) for dataset in datasets
+            ]
+            # First num_init points in each dataset are the init points.
+            for mask in _num_init_mask:
+                mask[:num_init] = True
+            # Concatenate the masks.
+            num_init = np.concatenate(_num_init_mask)
+
+        # Get the overall query points.
+        query_points = np.concatenate([dataset.query_points for dataset in datasets])
     else:
+        query_points = history.dataset.query_points  # All query points.
         new_points_mask = np.zeros(query_points.shape[0], dtype=bool)
         new_points_mask[-num_query_points:] = True
 

From 5b3ec0f1f3851dea1b27e9e957f290791821a0e9 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 16 Nov 2023 17:37:22 +0000
Subject: [PATCH 25/33] Make changes from PR feedback

---
 tests/unit/acquisition/test_rule.py       | 28 ++++-----
 tests/unit/objectives/test_utils.py       | 12 ++--
 tests/unit/test_ask_tell_optimization.py  |  6 +-
 tests/unit/test_bayesian_optimizer.py     |  6 +-
 tests/unit/utils/test_misc.py             | 32 +++++-----
 trieste/acquisition/rule.py               | 37 ++++++------
 trieste/acquisition/utils.py              |  4 +-
 trieste/ask_tell_optimization.py          | 18 +++---
 trieste/bayesian_optimizer.py             | 26 ++++----
 trieste/experimental/plotting/plotting.py |  4 +-
 trieste/objectives/utils.py               | 22 ++++---
 trieste/utils/misc.py                     | 72 +++++------------------
 12 files changed, 114 insertions(+), 153 deletions(-)

diff --git a/tests/unit/acquisition/test_rule.py b/tests/unit/acquisition/test_rule.py
index d810116121..b184d31306 100644
--- a/tests/unit/acquisition/test_rule.py
+++ b/tests/unit/acquisition/test_rule.py
@@ -69,7 +69,7 @@
 from trieste.observer import OBJECTIVE
 from trieste.space import Box, SearchSpace, TaggedMultiSearchSpace
 from trieste.types import State, Tag, TensorType
-from trieste.utils.misc import LocalTag, get_value_for_tag
+from trieste.utils.misc import LocalizedTag, get_value_for_tag
 
 
 def _line_search_maximize(
@@ -795,10 +795,10 @@ def test_trego_always_uses_global_dataset() -> None:
         tf.constant([[0.5, -0.2], [0.7, 0.2], [1.1, 0.3], [0.5, 0.5]]),
         tf.constant([[0.7], [0.8], [0.9], [1.0]]),
     )
-    updated_datasets = tr.filter_datasets({"OBJECTIVE__0": dataset + new_data})
+    updated_datasets = tr.filter_datasets({LocalizedTag(OBJECTIVE, 0): dataset + new_data})
 
     # Both the local and global datasets should match.
-    assert updated_datasets.keys() == {"OBJECTIVE", "OBJECTIVE__0"}
+    assert updated_datasets.keys() == {OBJECTIVE, LocalizedTag(OBJECTIVE, 0)}
     # Updated dataset should contain all the points, including ones outside the search space.
     exp_dataset = dataset + new_data
     for key in updated_datasets.keys():
@@ -1606,7 +1606,7 @@ def test_multi_trust_region_box_with_multiple_models_and_regions(
             tag = OBJECTIVE
             num_models = 1
         else:
-            tag = LocalTag(OBJECTIVE, i)
+            tag = LocalizedTag(OBJECTIVE, i)
             num_models = num_regions
 
         num_regions_per_model = num_regions // num_models
@@ -1689,18 +1689,18 @@ def prepare_acquisition_function(
         (
             {
                 OBJECTIVE: mk_dataset([[-1.0]], [[-1.0]]),  # Should be ignored.
-                "OBJECTIVE__0": mk_dataset([[0.0]], [[1.0]]),
-                "OBJECTIVE__1": mk_dataset([[1.0]], [[1.0]]),
-                "OBJECTIVE__2": mk_dataset([[2.0]], [[1.0]]),
+                LocalizedTag(OBJECTIVE, 0): mk_dataset([[0.0]], [[1.0]]),
+                LocalizedTag(OBJECTIVE, 1): mk_dataset([[1.0]], [[1.0]]),
+                LocalizedTag(OBJECTIVE, 2): mk_dataset([[2.0]], [[1.0]]),
             },
             1,
         ),
         (
             {
                 OBJECTIVE: mk_dataset([[-1.0]], [[-1.0]]),  # Should be ignored.
-                "OBJECTIVE__0": mk_dataset([[0.0], [1.0]], [[1.0], [1.0]]),
-                "OBJECTIVE__1": mk_dataset([[2.0], [1.0]], [[1.0], [1.0]]),
-                "OBJECTIVE__2": mk_dataset([[2.0], [3.0]], [[1.0], [1.0]]),
+                LocalizedTag(OBJECTIVE, 0): mk_dataset([[0.0], [1.0]], [[1.0], [1.0]]),
+                LocalizedTag(OBJECTIVE, 1): mk_dataset([[2.0], [1.0]], [[1.0], [1.0]]),
+                LocalizedTag(OBJECTIVE, 2): mk_dataset([[2.0], [3.0]], [[1.0], [1.0]]),
             },
             1,
         ),
@@ -1730,7 +1730,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
 
     updated_datasets = {}
     for tag in new_data:
-        _, dataset = get_value_for_tag(datasets, [tag, LocalTag.from_tag(tag).global_tag])
+        _, dataset = get_value_for_tag(datasets, *[tag, LocalizedTag.from_tag(tag).global_tag])
         assert dataset is not None
         updated_datasets[tag] = dataset + new_data[tag]
     datasets = rule.filter_datasets(updated_datasets)
@@ -1738,10 +1738,10 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
     # Check local datasets.
     for i, subspace in enumerate(subspaces):
         assert (
-            datasets[LocalTag(OBJECTIVE, i)].query_points.shape[0]
+            datasets[LocalizedTag(OBJECTIVE, i)].query_points.shape[0]
             == exp_num_init_points + num_query_points_per_region
         )
-        assert np.all(subspace.contains(datasets[LocalTag(OBJECTIVE, i)].query_points))
+        assert np.all(subspace.contains(datasets[LocalizedTag(OBJECTIVE, i)].query_points))
 
     # Check global dataset.
     assert datasets[OBJECTIVE].query_points.shape[0] == num_local_models * (
@@ -1752,7 +1752,7 @@ def test_multi_trust_region_box_updated_datasets_are_in_regions(
         assert any(subspace.contains(point) for subspace in subspaces)
     # Global dataset should be the concatenation of all local datasets.
     exp_query_points = tf.concat(
-        [datasets[LocalTag(OBJECTIVE, i)].query_points for i in range(num_local_models)], axis=0
+        [datasets[LocalizedTag(OBJECTIVE, i)].query_points for i in range(num_local_models)], axis=0
     )
     npt.assert_array_almost_equal(datasets[OBJECTIVE].query_points, exp_query_points)
 
diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index df25f1b034..1bc33dcca2 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Sequence, Union
+from typing import Callable, Sequence, Set, Union
 
 import numpy.testing as npt
 import pytest
@@ -21,7 +21,7 @@
 from trieste.objectives.utils import mk_batch_observer, mk_multi_observer, mk_observer
 from trieste.observer import Observer
 from trieste.types import Tag, TensorType
-from trieste.utils.misc import LocalTag
+from trieste.utils.misc import LocalizedTag
 
 
 def test_mk_observer() -> None:
@@ -83,9 +83,9 @@ def test_mk_batch_observer(
     assert isinstance(ys, dict)
 
     # Check keys.
-    exp_keys = set()
+    exp_keys: Set[Union[Tag, LocalizedTag]] = set()
     for key in keys:
-        exp_keys.update({LocalTag(key, i).tag for i in range(batch_size)})
+        exp_keys.update({LocalizedTag(key, i) for i in range(batch_size)})
         exp_keys.add(key)
     assert ys.keys() == exp_keys
 
@@ -102,5 +102,5 @@ def test_mk_batch_observer(
         npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
         npt.assert_array_equal(ys[key].observations, tf.reshape(exp_o, [-1, 1]))
         for i in range(batch_size):
-            npt.assert_array_equal(ys[LocalTag(key, i)].query_points, x_[:, i])
-            npt.assert_array_equal(ys[LocalTag(key, i)].observations, exp_o[:, i])
+            npt.assert_array_equal(ys[LocalizedTag(key, i)].query_points, x_[:, i])
+            npt.assert_array_equal(ys[LocalizedTag(key, i)].observations, exp_o[:, i])
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 3660f87171..04e97b0081 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -36,7 +36,7 @@
 from trieste.observer import OBJECTIVE
 from trieste.space import Box
 from trieste.types import State, Tag, TensorType
-from trieste.utils.misc import LocalTag
+from trieste.utils.misc import LocalizedTag
 
 # tags
 TAG1: Tag = "1"
@@ -451,7 +451,7 @@ def test_ask_tell_optimizer_creates_correct_datasets_for_rank3_points(
         init_data = {OBJECTIVE: mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])}
     else:
         init_data = {
-            LocalTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
+            LocalizedTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
             for i in range(batch_size)
         }
         init_data[OBJECTIVE] = mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])
@@ -484,7 +484,7 @@ def update(self, dataset: Dataset) -> None:
                 if use_global_model:
                     exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
                 else:
-                    index = LocalTag.from_tag(self._tag).local_index
+                    index = LocalizedTag.from_tag(self._tag).local_index
                     exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
 
             npt.assert_array_equal(exp_qps, dataset.query_points)
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index f50f479589..26d7bd610b 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -46,7 +46,7 @@
 from trieste.space import Box, SearchSpace
 from trieste.types import State, Tag, TensorType
 from trieste.utils import Err, Ok
-from trieste.utils.misc import LocalTag
+from trieste.utils.misc import LocalizedTag
 
 # tags
 FOO: Tag = "foo"
@@ -251,7 +251,7 @@ def test_bayesian_optimizer_creates_correct_datasets_for_rank3_points(
         init_data = {OBJECTIVE: mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])}
     else:
         init_data = {
-            LocalTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
+            LocalizedTag(OBJECTIVE, i): mk_dataset([[0.5 + i], [1.5 + i]], [[0.25], [0.35]])
             for i in range(batch_size)
         }
         init_data[OBJECTIVE] = mk_dataset([[0.5], [1.5]], [[0.25], [0.35]])
@@ -284,7 +284,7 @@ def update(self, dataset: Dataset) -> None:
                 if use_global_model:
                     exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
                 else:
-                    index = LocalTag.from_tag(self._tag).local_index
+                    index = LocalizedTag.from_tag(self._tag).local_index
                     exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
 
             npt.assert_array_equal(exp_qps, dataset.query_points)
diff --git a/tests/unit/utils/test_misc.py b/tests/unit/utils/test_misc.py
index 1e0eaf1a75..528f7aabf6 100644
--- a/tests/unit/utils/test_misc.py
+++ b/tests/unit/utils/test_misc.py
@@ -26,7 +26,7 @@
 from trieste.types import Tag, TensorType
 from trieste.utils.misc import (
     Err,
-    LocalTag,
+    LocalizedTag,
     Ok,
     Timer,
     flatten_leading_dims,
@@ -102,7 +102,7 @@ def test_get_value_for_tag_returns_none_if_mapping_is_none() -> None:
 
 
 def test_get_value_for_tag_raises_if_tag_not_in_mapping() -> None:
-    with pytest.raises(ValueError, match="none of the tags '.'baz'.' found in mapping"):
+    with pytest.raises(ValueError, match="none of the tags '.'baz',.' found in mapping"):
         get_value_for_tag({"foo": "bar"}, "baz")
 
 
@@ -116,38 +116,36 @@ def test_get_value_for_tag_returns_value_for_specified_tag() -> None:
 
 def test_get_value_for_tag_returns_first_matching_tag() -> None:
     assert get_value_for_tag(
-        {"foo": "bar", OBJECTIVE: "baz", "qux": "quux", "bar": "baz"}, ["far", "qux", "foo"]
+        {"foo": "bar", OBJECTIVE: "baz", "qux": "quux", "bar": "baz"}, *["far", "qux", "foo"]
     ) == ("qux", "quux")
 
 
 @pytest.mark.parametrize("tag_name", ["test_tag_1", "test_tag_2"])
 @pytest.mark.parametrize("tag_index", [0, 2, None])
-def test_local_tag_creation(tag_name: str, tag_index: Optional[int]) -> None:
-    tag = LocalTag(tag_name, tag_index)
+def test_localized_tag_creation(tag_name: str, tag_index: Optional[int]) -> None:
+    tag = LocalizedTag(tag_name, tag_index)
     is_local = True if tag_index is not None else False
-    exp_tag = f"{tag_name}__{tag_index}" if is_local else tag_name
+    # Ensure a duplicate tag is equal.
+    tag2 = LocalizedTag(tag_name, tag_index)
 
     assert tag.is_local == is_local
     assert tag.global_tag == tag_name
     assert tag.local_index == tag_index
-    assert tag == exp_tag
-    assert tag.tag == exp_tag
-    assert str(tag) == exp_tag
-    assert repr(tag) == f"LocalTag({tag_name}, {tag_index})"
-    assert hash(tag) == hash(exp_tag)
+    assert tag == tag2
+    assert hash(tag) == hash(tag2)
+    assert repr(tag) == f"LocalizedTag(global_tag='{tag_name}', local_index={tag_index})"
 
 
 @pytest.mark.parametrize(
     "tag, exp_tag",
     [
-        ("test_tag_1", LocalTag("test_tag_1", None)),
-        ("test_tag__2", LocalTag("test_tag", 2)),
-        (LocalTag("test_tag_1", 3), LocalTag("test_tag_1", 3)),
-        (LocalTag("test_tag", None), LocalTag("test_tag", None)),
+        ("test_tag_1", LocalizedTag("test_tag_1", None)),
+        (LocalizedTag("test_tag_1", 3), LocalizedTag("test_tag_1", 3)),
+        (LocalizedTag("test_tag", None), LocalizedTag("test_tag", None)),
     ],
 )
-def test_local_tag_from_tag(tag: Union[Tag, LocalTag], exp_tag: LocalTag) -> None:
-    ltag = LocalTag.from_tag(tag)
+def test_localized_tag_from_tag(tag: Union[Tag, LocalizedTag], exp_tag: LocalizedTag) -> None:
+    ltag = LocalizedTag.from_tag(tag)
     assert ltag.global_tag == exp_tag.global_tag
     assert ltag.local_index == exp_tag.local_index
 
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index c972b11688..de453c0daf 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -64,7 +64,7 @@
 from ..observer import OBJECTIVE
 from ..space import Box, SearchSpace, TaggedMultiSearchSpace
 from ..types import State, Tag, TensorType
-from ..utils.misc import LocalTag
+from ..utils.misc import LocalizedTag
 from .function import (
     BatchMonteCarloExpectedImprovement,
     ExpectedImprovement,
@@ -1010,7 +1010,7 @@ def _get_tags(self, tags: Set[Tag]) -> Tuple[Set[Tag], Set[Tag]]:
         local_gtags = set()
         global_tags = set()
         for tag in tags:
-            ltag = LocalTag.from_tag(tag)
+            ltag = LocalizedTag.from_tag(tag)
             if not ltag.is_local:
                 global_tags.add(tag)
             elif ltag.local_index == self.region_index:
@@ -1035,7 +1035,9 @@ def select_models(
         elif self.region_index is None:
             # If no index, then return the global models.
             _models = {
-                tag: model for tag, model in models.items() if not LocalTag.from_tag(tag).is_local
+                tag: model
+                for tag, model in models.items()
+                if not LocalizedTag.from_tag(tag).is_local
             }
         else:
             # Prefer matching local model for each tag, otherwise select the global model.
@@ -1043,7 +1045,7 @@ def select_models(
 
             _models = {}
             for tag in local_gtags:
-                ltag = LocalTag(tag, self.region_index)
+                ltag = LocalizedTag(tag, self.region_index)
                 _models[ltag] = models[ltag]
             for tag in global_tags:
                 _models[tag] = models[tag]
@@ -1066,7 +1068,7 @@ def select_datasets(
             _datasets = {
                 tag: dataset
                 for tag, dataset in datasets.items()
-                if not LocalTag.from_tag(tag).is_local
+                if not LocalizedTag.from_tag(tag).is_local
             }
         else:
             # Prefer matching local dataset for each tag, otherwise select the global dataset.
@@ -1074,7 +1076,7 @@ def select_datasets(
 
             _datasets = {}
             for tag in local_gtags:
-                ltag = LocalTag(tag, self.region_index)
+                ltag = LocalizedTag(tag, self.region_index)
                 _datasets[ltag] = datasets[ltag]
             for tag in global_tags:
                 _datasets[tag] = datasets[tag]
@@ -1104,7 +1106,7 @@ def get_datasets_filter_mask(
             return {
                 tag: self.contains(dataset.query_points)
                 for tag, dataset in datasets.items()
-                if LocalTag.from_tag(tag).local_index == self.region_index
+                if LocalizedTag.from_tag(tag).local_index == self.region_index
             }
 
 
@@ -1201,7 +1203,7 @@ def acquire(
 
         num_local_models: Dict[Tag, int] = defaultdict(int)
         for tag in models:
-            ltag = LocalTag.from_tag(tag)
+            ltag = LocalizedTag.from_tag(tag)
             if ltag.is_local:
                 num_local_models[ltag.global_tag] += 1
         num_local_models_vals = set(num_local_models.values())
@@ -1272,11 +1274,12 @@ def state_func(
                     # Remap all local tags to global ones. One reason is that single model
                     # acquisition builders expect OBJECTIVE to exist.
                     _models = {
-                        LocalTag.from_tag(tag).global_tag: model for tag, model in _models.items()
+                        LocalizedTag.from_tag(tag).global_tag: model
+                        for tag, model in _models.items()
                     }
                     if _datasets is not None:
                         _datasets = {
-                            LocalTag.from_tag(tag).global_tag: dataset
+                            LocalizedTag.from_tag(tag).global_tag: dataset
                             for tag, dataset in _datasets.items()
                         }
                     _points.append(rule.acquire(subspace, _models, _datasets))
@@ -1339,11 +1342,11 @@ def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Datas
         used_masks = {
             tag: tf.zeros(dataset.query_points.shape[:-1], dtype=tf.bool)
             for tag, dataset in datasets.items()
-            if LocalTag.from_tag(tag).is_local
+            if LocalizedTag.from_tag(tag).is_local
         }
 
         # Global datasets to re-generate.
-        global_tags = {LocalTag.from_tag(tag).global_tag for tag in used_masks}
+        global_tags = {LocalizedTag.from_tag(tag).global_tag for tag in used_masks}
 
         # Using init_subspaces here relies on the users not creating new subspaces after
         # initialization. This is a reasonable assumption for now.
@@ -1352,7 +1355,7 @@ def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Datas
             in_region_masks = subspace.get_datasets_filter_mask(datasets)
             if in_region_masks is not None:
                 for tag, in_region in in_region_masks.items():
-                    ltag = LocalTag.from_tag(tag)
+                    ltag = LocalizedTag.from_tag(tag)
                     assert ltag.is_local, f"can only filter local tags, got {tag}"
                     used_masks[tag] = tf.logical_or(used_masks[tag], in_region)
 
@@ -1370,7 +1373,7 @@ def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Datas
             local_datasets = [
                 value
                 for tag, value in filtered_datasets.items()
-                if LocalTag.from_tag(tag).global_tag == gtag
+                if LocalizedTag.from_tag(tag).global_tag == gtag
             ]
             # Note there is no ordering assumption for the local datasets. They are simply
             # concatenated and information about which local dataset they came from is lost.
@@ -1491,7 +1494,7 @@ def get_dataset_min(
         if (
             datasets is None
             or len(datasets) != 1
-            or LocalTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
+            or LocalizedTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
         ):
             raise ValueError("""a single OBJECTIVE dataset must be provided""")
         dataset = next(iter(datasets.values()))
@@ -1648,7 +1651,7 @@ def get_datasets_filter_mask(
             return {
                 tag: tf.ones(tf.shape(dataset.query_points)[:-1], dtype=tf.bool)
                 for tag, dataset in datasets.items()
-                if LocalTag.from_tag(tag).local_index == self.region_index
+                if LocalizedTag.from_tag(tag).local_index == self.region_index
             }
 
     @inherit_check_shapes
@@ -1659,7 +1662,7 @@ def get_dataset_min(
         if (
             datasets is None
             or len(datasets) != 1
-            or LocalTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
+            or LocalizedTag.from_tag(next(iter(datasets))).global_tag != OBJECTIVE
         ):
             raise ValueError("""a single OBJECTIVE dataset must be provided""")
         dataset = next(iter(datasets.values()))
diff --git a/trieste/acquisition/utils.py b/trieste/acquisition/utils.py
index dd28a1fa1c..3fac3a2d41 100644
--- a/trieste/acquisition/utils.py
+++ b/trieste/acquisition/utils.py
@@ -23,7 +23,7 @@
 from ..observer import OBJECTIVE
 from ..space import SearchSpaceType
 from ..types import Tag, TensorType
-from ..utils.misc import LocalTag
+from ..utils.misc import LocalizedTag
 from .interface import AcquisitionFunction
 from .optimizer import AcquisitionOptimizer
 
@@ -155,7 +155,7 @@ def copy_to_local_models(
     :param key: The tag prefix for the local models.
     :return: A mapping of the local models.
     """
-    return {LocalTag(key, i).tag: copy.deepcopy(global_model) for i in range(num_local_models)}
+    return {LocalizedTag(key, i): copy.deepcopy(global_model) for i in range(num_local_models)}
 
 
 @check_shapes(
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index b42227ff12..8f60076d2d 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -47,7 +47,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Ok, Timer
-from .utils.misc import LocalTag, get_value_for_tag
+from .utils.misc import LocalizedTag, get_value_for_tag
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -196,8 +196,8 @@ def __init__(
         models = cast(Dict[Tag, TrainableProbabilisticModelType], models)
 
         # Get set of dataset and model keys, ignoring any local tag index.
-        datasets_keys = {LocalTag.from_tag(tag).global_tag for tag in datasets.keys()}
-        models_keys = {LocalTag.from_tag(tag).global_tag for tag in models.keys()}
+        datasets_keys = {LocalizedTag.from_tag(tag).global_tag for tag in datasets.keys()}
+        models_keys = {LocalizedTag.from_tag(tag).global_tag for tag in models.keys()}
         if datasets_keys != models_keys:
             raise ValueError(
                 f"datasets and models should contain the same keys. Got {datasets_keys} and"
@@ -237,8 +237,8 @@ def __init__(
             with Timer() as initial_model_fitting_timer:
                 for tag, model in self._models.items():
                     # Prefer local dataset if available.
-                    tags = [tag, LocalTag.from_tag(tag).global_tag]
-                    _, dataset = get_value_for_tag(datasets, tags)
+                    tags = [tag, LocalizedTag.from_tag(tag).global_tag]
+                    _, dataset = get_value_for_tag(datasets, *tags)
                     assert dataset is not None
                     model.update(dataset)
                     model.optimize_and_save_result(dataset)
@@ -266,7 +266,7 @@ def dataset(self) -> Dataset:
         """The current dataset when there is just one dataset."""
         # Ignore local datasets.
         datasets: Mapping[Tag, Dataset] = dict(
-            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.datasets.items())
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.datasets.items())
         )
         if len(datasets) == 1:
             return next(iter(datasets.values()))
@@ -293,7 +293,7 @@ def model(self) -> TrainableProbabilisticModel:
         """The current model when there is just one model."""
         # Ignore local models.
         models: Mapping[Tag, TrainableProbabilisticModel] = dict(
-            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.models.items())
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.models.items())
         )
         if len(models) == 1:
             return next(iter(models.values()))
@@ -440,7 +440,7 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
         # The datasets must have the same keys as the existing datasets. Only exception is if
         # the existing datasets are all global, in which case the dataset will be appropriately
         # updated below for the next iteration.
-        datasets_indices = {LocalTag.from_tag(tag).local_index for tag in self._datasets.keys()}
+        datasets_indices = {LocalizedTag.from_tag(tag).local_index for tag in self._datasets.keys()}
         if self._datasets.keys() != new_data.keys() and datasets_indices != {None}:
             raise ValueError(
                 f"new_data keys {new_data.keys()} doesn't "
@@ -463,7 +463,7 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
         updated_datasets = {}
         for tag, new_dataset in new_data.items():
             _, old_dataset = get_value_for_tag(
-                self._datasets, [tag, LocalTag.from_tag(tag).global_tag]
+                self._datasets, *[tag, LocalizedTag.from_tag(tag).global_tag]
             )
             assert old_dataset is not None
             updated_datasets[tag] = old_dataset + new_dataset
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index f9d2f232df..ee3eed54ee 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -62,7 +62,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Err, Ok, Result, Timer
-from .utils.misc import LocalTag, get_value_for_tag
+from .utils.misc import LocalizedTag, get_value_for_tag
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -100,7 +100,7 @@ def dataset(self) -> Dataset:
         """The dataset when there is just one dataset."""
         # Ignore local datasets.
         datasets: Mapping[Tag, Dataset] = dict(
-            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.datasets.items())
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.datasets.items())
         )
         if len(datasets) == 1:
             return next(iter(datasets.values()))
@@ -112,7 +112,7 @@ def model(self) -> TrainableProbabilisticModel:
         """The model when there is just one dataset."""
         # Ignore local models.
         models: Mapping[Tag, TrainableProbabilisticModel] = dict(
-            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, self.models.items())
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.models.items())
         )
         if len(models) == 1:
             return next(iter(models.values()))
@@ -238,7 +238,7 @@ def try_get_final_dataset(self) -> Dataset:
         datasets = self.try_get_final_datasets()
         # Ignore local datasets.
         datasets = dict(
-            filter(lambda item: not LocalTag.from_tag(item[0]).is_local, datasets.items())
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, datasets.items())
         )
         if len(datasets) == 1:
             return next(iter(datasets.values()))
@@ -284,7 +284,9 @@ def try_get_final_model(self) -> TrainableProbabilisticModel:
         """
         models = self.try_get_final_models()
         # Ignore local models.
-        models = dict(filter(lambda item: not LocalTag.from_tag(item[0]).is_local, models.items()))
+        models = dict(
+            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, models.items())
+        )
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -655,8 +657,8 @@ def optimize(
             raise ValueError(f"num_steps must be at least 0, got {num_steps}")
 
         # Get set of dataset and model keys, ignoring any local tag index.
-        datasets_keys = {LocalTag.from_tag(tag).global_tag for tag in datasets.keys()}
-        models_keys = {LocalTag.from_tag(tag).global_tag for tag in models.keys()}
+        datasets_keys = {LocalizedTag.from_tag(tag).global_tag for tag in datasets.keys()}
+        models_keys = {LocalizedTag.from_tag(tag).global_tag for tag in models.keys()}
         if datasets_keys != models_keys:
             raise ValueError(
                 f"datasets and models should contain the same keys. Got {datasets_keys} and"
@@ -739,8 +741,8 @@ def optimize(
                     with Timer() as initial_model_fitting_timer:
                         for tag, model in models.items():
                             # Prefer local dataset if available.
-                            tags = [tag, LocalTag.from_tag(tag).global_tag]
-                            _, dataset = get_value_for_tag(datasets, tags)
+                            tags = [tag, LocalizedTag.from_tag(tag).global_tag]
+                            _, dataset = get_value_for_tag(datasets, *tags)
                             assert dataset is not None
                             model.update(dataset)
                             model.optimize_and_save_result(dataset)
@@ -778,7 +780,7 @@ def optimize(
                     updated_datasets = {}
                     for tag, new_dataset in tagged_output.items():
                         _, old_dataset = get_value_for_tag(
-                            datasets, [tag, LocalTag.from_tag(tag).global_tag]
+                            datasets, *[tag, LocalizedTag.from_tag(tag).global_tag]
                         )
                         assert old_dataset is not None
                         updated_datasets[tag] = old_dataset + new_dataset
@@ -922,8 +924,8 @@ def write_summary_initial_model_fit(
     for tag, model in models.items():
         with tf.name_scope(f"{tag}.model"):
             # Prefer local dataset if available.
-            tags = [tag, LocalTag.from_tag(tag).global_tag]
-            _, dataset = get_value_for_tag(datasets, tags)
+            tags = [tag, LocalizedTag.from_tag(tag).global_tag]
+            _, dataset = get_value_for_tag(datasets, *tags)
             assert dataset is not None
             model.log(dataset)
     logging.scalar(
diff --git a/trieste/experimental/plotting/plotting.py b/trieste/experimental/plotting/plotting.py
index 7ce516aa0b..92183e503a 100644
--- a/trieste/experimental/plotting/plotting.py
+++ b/trieste/experimental/plotting/plotting.py
@@ -37,7 +37,7 @@
 from trieste.space import TaggedMultiSearchSpace
 from trieste.types import TensorType
 from trieste.utils import to_numpy
-from trieste.utils.misc import LocalTag
+from trieste.utils.misc import LocalizedTag
 
 
 def create_grid(
@@ -597,7 +597,7 @@ def plot_trust_region_history_2d(
     # Otherwise, use the global dataset and assume the last `num_query_points` points are new.
     if len(history.datasets) > 1:
         # Expect there to be an objective dataset for each subspace.
-        datasets = [history.datasets[LocalTag(OBJECTIVE, i)] for i in range(len(spaces))]
+        datasets = [history.datasets[LocalizedTag(OBJECTIVE, i)] for i in range(len(spaces))]
 
         _new_points_mask = [
             np.zeros(dataset.query_points.shape[0], dtype=bool) for dataset in datasets
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index 014b66feac..d765feb13f 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -28,7 +28,7 @@
 from ..data import Dataset
 from ..observer import OBJECTIVE, MultiObserver, Observer, SingleObserver
 from ..types import Tag, TensorType
-from ..utils.misc import LocalTag
+from ..utils.misc import LocalizedTag
 
 
 @overload
@@ -83,12 +83,12 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         # Call objective with rank 2 query points by flattening batch dimension.
         # Some objectives might only expect rank 2 query points, so this is safer.
         batch_size = qps.shape[1]
-        qps = tf.reshape(qps, [-1, qps.shape[-1]])
-        obs_or_dataset = objective_or_observer(qps)
+        flat_qps = tf.reshape(qps, [-1, qps.shape[-1]])
+        obs_or_dataset = objective_or_observer(flat_qps)
 
         if not isinstance(obs_or_dataset, (Mapping, Dataset)):
             # Just a single observation, so wrap in a dataset.
-            obs_or_dataset = Dataset(qps, obs_or_dataset)
+            obs_or_dataset = Dataset(flat_qps, obs_or_dataset)
 
         if isinstance(obs_or_dataset, Dataset):
             # Convert to a mapping with a default key.
@@ -97,14 +97,12 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         datasets = {}
         for key, dataset in obs_or_dataset.items():
             # Include overall dataset and per batch dataset.
-            obs = dataset.observations
-            qps = tf.reshape(qps, [-1, batch_size, qps.shape[-1]])
-            obs = tf.reshape(obs, [-1, batch_size, obs.shape[-1]])
-            _datasets = {
-                key: dataset,
-                **{LocalTag(key, i): Dataset(qps[:, i], obs[:, i]) for i in range(batch_size)},
-            }
-            datasets.update(_datasets)
+            flat_obs = dataset.observations
+            qps = tf.reshape(flat_qps, [-1, batch_size, flat_qps.shape[-1]])
+            obs = tf.reshape(flat_obs, [-1, batch_size, flat_obs.shape[-1]])
+            datasets[key] = dataset
+            for i in range(batch_size):
+                datasets[LocalizedTag(key, i)] = Dataset(qps[:, i], obs[:, i])
 
         return datasets
 
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index 04a16cfad5..7fb0e4b6ce 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -17,19 +17,7 @@
 from dataclasses import dataclass
 from time import perf_counter
 from types import TracebackType
-from typing import (
-    Any,
-    Callable,
-    Generic,
-    Mapping,
-    NoReturn,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
+from typing import Any, Callable, Generic, Mapping, NoReturn, Optional, Tuple, Type, TypeVar, Union
 
 import numpy as np
 import tensorflow as tf
@@ -234,7 +222,7 @@ def map_values(f: Callable[[U], V], mapping: Mapping[K, U]) -> Mapping[K, V]:
 
 
 def get_value_for_tag(
-    mapping: Optional[Mapping[Tag, T]], tags: Union[Tag, Sequence[Tag]] = OBJECTIVE
+    mapping: Optional[Mapping[Tag, T]], *tags: Tag
 ) -> Tuple[Optional[Tag], Optional[T]]:
     """Return the value from a mapping for the first tag found from a sequence of tags.
 
@@ -245,25 +233,27 @@ def get_value_for_tag(
     :raises ValueError: If none of the tags are in the mapping and the mapping is not None.
     """
 
-    if isinstance(tags, Tag):
-        tags = [tags]
+    if not tags:
+        tags = (OBJECTIVE,)
 
     if mapping is None:
         return None, None
     else:
-        matched_tags = sorted(set(tags) & set(mapping.keys()), key=tags.index)
-        if matched_tags:
-            return matched_tags[0], mapping[matched_tags[0]]
-        else:
+        matched_tag = next((tag for tag in tags if tag in mapping), None)
+        if matched_tag is None:
             raise ValueError(f"none of the tags '{tags}' found in mapping")
+        return matched_tag, mapping[matched_tag]
 
 
 @dataclass(frozen=True)
-class LocalTag:
-    """Manage a tag for a local model or dataset."""
+class LocalizedTag:
+    """Manage a tag for a local model or dataset. These have a global tag and a local index."""
 
     global_tag: Tag
+    """ The global portion of the tag. """
+
     local_index: Optional[int]
+    """ The local index of the tag. """
 
     def __post_init__(self) -> None:
         if self.local_index is not None and self.local_index < 0:
@@ -274,43 +264,13 @@ def is_local(self) -> bool:
         """Return True if the tag is a local tag."""
         return self.local_index is not None
 
-    @property
-    def tag(self) -> Tag:
-        """The local tag."""
-        if self.is_local:
-            return f"{self.global_tag}__{self.local_index}"
-        else:
-            return self.global_tag
-
-    def __repr__(self) -> str:
-        """Return the local tag."""
-        return f"LocalTag({self.global_tag}, {self.local_index})"
-
-    def __str__(self) -> str:
-        """Return the local tag."""
-        return str(self.tag)
-
-    def __hash__(self) -> int:
-        """Return the hash of the overall tag."""
-        return hash(self.tag)
-
-    def __eq__(self, other: object) -> bool:
-        """Return True if the local tag is equal to the other object."""
-        return hash(self) == hash(other)
-
     @staticmethod
-    def from_tag(tag: Union[Tag, LocalTag]) -> LocalTag:
-        """Return a LocalTag from a given tag."""
-        if isinstance(tag, LocalTag):
+    def from_tag(tag: Union[Tag, LocalizedTag]) -> LocalizedTag:
+        """Return a LocalizedTag from a given tag."""
+        if isinstance(tag, LocalizedTag):
             return tag
         else:
-            tag = str(tag)
-            if "__" in tag:
-                global_tag, _local_index = tag.split("__")
-                local_index = int(_local_index)
-            else:
-                global_tag, local_index = tag, None
-            return LocalTag(global_tag, local_index)
+            return LocalizedTag(tag, None)
 
 
 class Timer:

From e5cccc89d785ef9822fd0494c2f59a7804f273e8 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 23 Nov 2023 16:42:34 +0000
Subject: [PATCH 26/33] Address some of the recent feedback

---
 docs/notebooks/trust_region.pct.py       | 38 --------------------
 tests/unit/acquisition/test_utils.py     | 17 ++++++++-
 tests/unit/test_ask_tell_optimization.py |  5 +--
 tests/unit/test_bayesian_optimizer.py    |  3 +-
 trieste/ask_tell_optimization.py         | 35 ++++++++++---------
 trieste/bayesian_optimizer.py            | 44 +++++++++++-------------
 trieste/utils/misc.py                    | 13 ++++++-
 7 files changed, 72 insertions(+), 83 deletions(-)

diff --git a/docs/notebooks/trust_region.pct.py b/docs/notebooks/trust_region.pct.py
index 140f9f28d4..7095fb3037 100644
--- a/docs/notebooks/trust_region.pct.py
+++ b/docs/notebooks/trust_region.pct.py
@@ -245,44 +245,6 @@ def plot_history(result: trieste.bayesian_optimizer.OptimizationResult) -> None:
 # %%
 plot_history(result)
 
-# %% [markdown]
-# ## TEST
-
-# %%
-num_query_points = 5
-
-init_subspaces = [
-    trieste.acquisition.rule.SingleObjectiveTrustRegionBox(search_space)
-    for _ in range(num_query_points)
-]
-base_rule = trieste.acquisition.rule.EfficientGlobalOptimization(
-    builder=trieste.acquisition.ParallelContinuousThompsonSampling(),
-    num_query_points=1,
-)
-batch_acq_rule = trieste.acquisition.rule.BatchTrustRegionBox(
-    init_subspaces, base_rule
-)
-
-bo = trieste.bayesian_optimizer.BayesianOptimizer(observer, search_space)
-
-num_steps = 5
-result = bo.optimize(
-    num_steps,
-    {trieste.observer.OBJECTIVE: initial_data},
-    trieste.acquisition.utils.copy_to_local_models(
-        build_model(), num_query_points
-    ),
-    batch_acq_rule,
-    track_state=True,
-)
-dataset = result.try_get_final_dataset()
-
-# %%
-plot_final_result(dataset)
-
-# %%
-plot_history(result)
-
 # %% [markdown]
 # ## Trust region `TurBO` acquisition rule
 #
diff --git a/tests/unit/acquisition/test_utils.py b/tests/unit/acquisition/test_utils.py
index 7975cab3a0..9e578587ca 100644
--- a/tests/unit/acquisition/test_utils.py
+++ b/tests/unit/acquisition/test_utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, Optional
 from unittest.mock import MagicMock
 
 import numpy as np
@@ -22,6 +22,7 @@
 
 from trieste.acquisition import AcquisitionFunction
 from trieste.acquisition.utils import (
+    copy_to_local_models,
     get_local_dataset,
     get_unique_points_mask,
     select_nth_output,
@@ -29,6 +30,8 @@
 )
 from trieste.data import Dataset
 from trieste.space import Box, SearchSpaceType
+from trieste.types import Tag
+from trieste.utils.misc import LocalizedTag
 
 
 @pytest.mark.parametrize(
@@ -100,6 +103,18 @@ def test_get_local_dataset_works() -> None:
     assert tf.shape(get_local_dataset(search_space_2, combined).query_points)[0] == 20
 
 
+@pytest.mark.parametrize("num_local_models", [1, 3])
+@pytest.mark.parametrize("key", [None, "a"])
+def test_copy_to_local_models(num_local_models: int, key: Optional[Tag]) -> None:
+    global_model = MagicMock()
+    local_models = copy_to_local_models(global_model, num_local_models=num_local_models, key=key)
+    assert len(local_models) == num_local_models
+    for i, (k, m) in enumerate(local_models.items()):
+        assert k == LocalizedTag(key, i)
+        assert isinstance(m, MagicMock)
+        assert m is not global_model
+
+
 @pytest.mark.parametrize(
     "points, tolerance, expected_mask",
     [
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index 04e97b0081..e5a25b2b20 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import copy
 from typing import Mapping, Optional
 
 import numpy.testing as npt
@@ -180,7 +181,7 @@ def test_ask_tell_optimizer_copies_state(
     ask_tell.tell(new_data)
     state_end: Record[None] = ask_tell.to_record(copy=copy)
 
-    assert_datasets_allclose(state_start.dataset, init_dataset)
+    assert_datasets_allclose(state_start.dataset, init_dataset if copy else init_dataset + new_data)
     assert_datasets_allclose(state_end.dataset, init_dataset + new_data)
     assert state_start.model is not model if copy else state_start.model is model
 
@@ -502,7 +503,7 @@ def update(self, dataset: Dataset) -> None:
 
     observer = mk_batch_observer(lambda x: Dataset(x, x))
     rule = FixedAcquisitionRule(query_points)
-    ask_tell = AskTellOptimizer(search_space, init_data, models, rule)
+    ask_tell = AskTellOptimizer(search_space, copy.deepcopy(init_data), models, rule)
 
     points = ask_tell.ask()
     new_data = observer(points)
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index 26d7bd610b..c8beaa7269 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import copy
 import tempfile
 from collections.abc import Mapping
 from pathlib import Path
@@ -302,7 +303,7 @@ def update(self, dataset: Dataset) -> None:
 
     optimizer = BayesianOptimizer(lambda x: Dataset(x, x), search_space)
     rule = FixedAcquisitionRule(query_points)
-    optimizer.optimize(1, init_data, models, rule).final_result.unwrap()
+    optimizer.optimize(1, copy.deepcopy(init_data), models, rule).final_result.unwrap()
 
 
 @pytest.mark.parametrize("mode", ["early", "fail", "full"])
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index 8f60076d2d..aa809f3abd 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -47,7 +47,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Ok, Timer
-from .utils.misc import LocalizedTag, get_value_for_tag
+from .utils.misc import LocalizedTag, get_value_for_tag, ignoring_local_tags
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -195,7 +195,8 @@ def __init__(
         # reassure the type checker that everything is tagged
         models = cast(Dict[Tag, TrainableProbabilisticModelType], models)
 
-        # Get set of dataset and model keys, ignoring any local tag index.
+        # Get set of dataset and model keys, ignoring any local tag index. That is, only the
+        # global tag part is considered.
         datasets_keys = {LocalizedTag.from_tag(tag).global_tag for tag in datasets.keys()}
         models_keys = {LocalizedTag.from_tag(tag).global_tag for tag in models.keys()}
         if datasets_keys != models_keys:
@@ -265,9 +266,7 @@ def datasets(self) -> Mapping[Tag, Dataset]:
     def dataset(self) -> Dataset:
         """The current dataset when there is just one dataset."""
         # Ignore local datasets.
-        datasets: Mapping[Tag, Dataset] = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.datasets.items())
-        )
+        datasets: Mapping[Tag, Dataset] = ignoring_local_tags(self.datasets)
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -292,9 +291,7 @@ def models(self, models: Mapping[Tag, TrainableProbabilisticModelType]) -> None:
     def model(self) -> TrainableProbabilisticModel:
         """The current model when there is just one model."""
         # Ignore local models.
-        models: Mapping[Tag, TrainableProbabilisticModel] = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.models.items())
-        )
+        models: Mapping[Tag, TrainableProbabilisticModel] = ignoring_local_tags(self.models)
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -460,15 +457,19 @@ def tell(self, new_data: Mapping[Tag, Dataset] | Dataset) -> None:
         # (in BatchTrustRegion) would otherwise remove duplicates. The main way to avoid the issue
         # in this scenario is to provide local initial datasets, instead of a global initial
         # dataset.
-        updated_datasets = {}
-        for tag, new_dataset in new_data.items():
-            _, old_dataset = get_value_for_tag(
-                self._datasets, *[tag, LocalizedTag.from_tag(tag).global_tag]
-            )
-            assert old_dataset is not None
-            updated_datasets[tag] = old_dataset + new_dataset
-        self._datasets = updated_datasets
-        self._filtered_datasets = self._acquisition_rule.filter_datasets(updated_datasets)
+        sorted_tags = sorted(  # We need to process the local tags first, then the global tags.
+            new_data, key=lambda tag: not LocalizedTag.from_tag(tag).is_local
+        )
+        for tag in sorted_tags:
+            new_dataset = new_data[tag]
+            if tag in self._datasets:
+                self._datasets[tag] += new_dataset
+            else:
+                global_tag = LocalizedTag.from_tag(tag).global_tag
+                if global_tag not in self._datasets:
+                    raise ValueError(f"global tag '{global_tag}' not found in dataset")
+                self._datasets[tag] = self._datasets[global_tag] + new_dataset
+        self._filtered_datasets = self._acquisition_rule.filter_datasets(self._datasets)
 
         with Timer() as model_fitting_timer:
             for tag, model in self._models.items():
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index ee3eed54ee..6f8e2796f3 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -62,7 +62,7 @@
 from .space import SearchSpace
 from .types import State, Tag, TensorType
 from .utils import Err, Ok, Result, Timer
-from .utils.misc import LocalizedTag, get_value_for_tag
+from .utils.misc import LocalizedTag, get_value_for_tag, ignoring_local_tags
 
 StateType = TypeVar("StateType")
 """ Unbound type variable. """
@@ -99,9 +99,7 @@ class Record(Generic[StateType]):
     def dataset(self) -> Dataset:
         """The dataset when there is just one dataset."""
         # Ignore local datasets.
-        datasets: Mapping[Tag, Dataset] = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.datasets.items())
-        )
+        datasets: Mapping[Tag, Dataset] = ignoring_local_tags(self.datasets)
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -111,9 +109,7 @@ def dataset(self) -> Dataset:
     def model(self) -> TrainableProbabilisticModel:
         """The model when there is just one dataset."""
         # Ignore local models.
-        models: Mapping[Tag, TrainableProbabilisticModel] = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, self.models.items())
-        )
+        models: Mapping[Tag, TrainableProbabilisticModel] = ignoring_local_tags(self.models)
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -237,9 +233,7 @@ def try_get_final_dataset(self) -> Dataset:
         """
         datasets = self.try_get_final_datasets()
         # Ignore local datasets.
-        datasets = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, datasets.items())
-        )
+        datasets = ignoring_local_tags(datasets)
         if len(datasets) == 1:
             return next(iter(datasets.values()))
         else:
@@ -284,9 +278,7 @@ def try_get_final_model(self) -> TrainableProbabilisticModel:
         """
         models = self.try_get_final_models()
         # Ignore local models.
-        models = dict(
-            filter(lambda item: not LocalizedTag.from_tag(item[0]).is_local, models.items())
-        )
+        models = ignoring_local_tags(models)
         if len(models) == 1:
             return next(iter(models.values()))
         else:
@@ -656,7 +648,8 @@ def optimize(
         if num_steps < 0:
             raise ValueError(f"num_steps must be at least 0, got {num_steps}")
 
-        # Get set of dataset and model keys, ignoring any local tag index.
+        # Get set of dataset and model keys, ignoring any local tag index. That is, only the
+        # global tag part is considered.
         datasets_keys = {LocalizedTag.from_tag(tag).global_tag for tag in datasets.keys()}
         models_keys = {LocalizedTag.from_tag(tag).global_tag for tag in models.keys()}
         if datasets_keys != models_keys:
@@ -777,15 +770,20 @@ def optimize(
                     )
 
                     # See explanation in ask_tell_optimization.tell().
-                    updated_datasets = {}
-                    for tag, new_dataset in tagged_output.items():
-                        _, old_dataset = get_value_for_tag(
-                            datasets, *[tag, LocalizedTag.from_tag(tag).global_tag]
-                        )
-                        assert old_dataset is not None
-                        updated_datasets[tag] = old_dataset + new_dataset
-                    datasets = updated_datasets
-                    filtered_datasets = acquisition_rule.filter_datasets(updated_datasets)
+                    # We need to process the local tags first, then the global tags.
+                    sorted_tags = sorted(
+                        tagged_output, key=lambda tag: not LocalizedTag.from_tag(tag).is_local
+                    )
+                    for tag in sorted_tags:
+                        new_dataset = tagged_output[tag]
+                        if tag in datasets:
+                            datasets[tag] += new_dataset
+                        else:
+                            global_tag = LocalizedTag.from_tag(tag).global_tag
+                            if global_tag not in datasets:
+                                raise ValueError(f"global tag '{global_tag}' not found in dataset")
+                            datasets[tag] = datasets[global_tag] + new_dataset
+                    filtered_datasets = acquisition_rule.filter_datasets(datasets)
 
                     with Timer() as model_fitting_timer:
                         if fit_model:
diff --git a/trieste/utils/misc.py b/trieste/utils/misc.py
index 7fb0e4b6ce..34f76ae8da 100644
--- a/trieste/utils/misc.py
+++ b/trieste/utils/misc.py
@@ -227,7 +227,8 @@ def get_value_for_tag(
     """Return the value from a mapping for the first tag found from a sequence of tags.
 
     :param mapping: A mapping from tags to values.
-    :param tags: A tag or a sequence of tags. Sequence is searched in order.
+    :param tags: A tag or a sequence of tags. Sequence is searched in order. If no tags are
+        provided, the default tag OBJECTIVE is used.
     :return: The chosen tag and value of the tag in the mapping, or None for each if the mapping is
         None.
     :raises ValueError: If none of the tags are in the mapping and the mapping is not None.
@@ -273,6 +274,16 @@ def from_tag(tag: Union[Tag, LocalizedTag]) -> LocalizedTag:
             return LocalizedTag(tag, None)
 
 
+def ignoring_local_tags(mapping: Mapping[Tag, T]) -> Mapping[Tag, T]:
+    """
+    Filter out local tags from a mapping, returning a new mapping with only global tags.
+
+    :param mapping: A mapping from tags to values.
+    :return: A new mapping with only global tags.
+    """
+    return {k: v for k, v in mapping.items() if not LocalizedTag.from_tag(k).is_local}
+
+
 class Timer:
     """
     Functionality for timing chunks of code. For example:

From 25da01b5b5056103f77cf83a79b70766e9d73546 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Thu, 23 Nov 2023 16:55:19 +0000
Subject: [PATCH 27/33] Fix dataset mypy error

---
 trieste/ask_tell_optimization.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index aa809f3abd..e4ed1bc0b8 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -192,7 +192,10 @@ def __init__(
         if not isinstance(models, Mapping):
             models = {OBJECTIVE: models}
 
+        self._filtered_datasets = datasets
+
         # reassure the type checker that everything is tagged
+        datasets = cast(Dict[Tag, Dataset], datasets)
         models = cast(Dict[Tag, TrainableProbabilisticModelType], models)
 
         # Get set of dataset and model keys, ignoring any local tag index. That is, only the
@@ -206,7 +209,6 @@ def __init__(
             )
 
         self._datasets = datasets
-        self._filtered_datasets = datasets
         self._models = models
 
         self._query_plot_dfs: dict[int, pd.DataFrame] = {}

From 292faaafcdd9dc2436b9a37c1aa4e9649672204a Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 24 Nov 2023 10:46:37 +0000
Subject: [PATCH 28/33] Copy dataset in optimizers to avoid changing it

---
 tests/unit/test_ask_tell_optimization.py | 3 +--
 tests/unit/test_bayesian_optimizer.py    | 3 +--
 trieste/ask_tell_optimization.py         | 3 +++
 trieste/bayesian_optimizer.py            | 3 +++
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index e5a25b2b20..b696d37c1b 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from __future__ import annotations
 
-import copy
 from typing import Mapping, Optional
 
 import numpy.testing as npt
@@ -503,7 +502,7 @@ def update(self, dataset: Dataset) -> None:
 
     observer = mk_batch_observer(lambda x: Dataset(x, x))
     rule = FixedAcquisitionRule(query_points)
-    ask_tell = AskTellOptimizer(search_space, copy.deepcopy(init_data), models, rule)
+    ask_tell = AskTellOptimizer(search_space, init_data, models, rule)
 
     points = ask_tell.ask()
     new_data = observer(points)
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index c8beaa7269..26d7bd610b 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from __future__ import annotations
 
-import copy
 import tempfile
 from collections.abc import Mapping
 from pathlib import Path
@@ -303,7 +302,7 @@ def update(self, dataset: Dataset) -> None:
 
     optimizer = BayesianOptimizer(lambda x: Dataset(x, x), search_space)
     rule = FixedAcquisitionRule(query_points)
-    optimizer.optimize(1, copy.deepcopy(init_data), models, rule).final_result.unwrap()
+    optimizer.optimize(1, init_data, models, rule).final_result.unwrap()
 
 
 @pytest.mark.parametrize("mode", ["early", "fail", "full"])
diff --git a/trieste/ask_tell_optimization.py b/trieste/ask_tell_optimization.py
index e4ed1bc0b8..4068ef97c4 100644
--- a/trieste/ask_tell_optimization.py
+++ b/trieste/ask_tell_optimization.py
@@ -187,6 +187,9 @@ def __init__(
         if not datasets or not models:
             raise ValueError("dicts of datasets and models must be populated.")
 
+        # Copy the dataset so we don't change the one provided by the user.
+        datasets = deepcopy(datasets)
+
         if isinstance(datasets, Dataset):
             datasets = {OBJECTIVE: datasets}
         if not isinstance(models, Mapping):
diff --git a/trieste/bayesian_optimizer.py b/trieste/bayesian_optimizer.py
index 6f8e2796f3..675c268aba 100644
--- a/trieste/bayesian_optimizer.py
+++ b/trieste/bayesian_optimizer.py
@@ -635,6 +635,9 @@ def optimize(
             - ``datasets`` or ``models`` are empty
             - the default `acquisition_rule` is used and the tags are not `OBJECTIVE`.
         """
+        # Copy the dataset so we don't change the one provided by the user.
+        datasets = copy.deepcopy(datasets)
+
         if isinstance(datasets, Dataset):
             datasets = {OBJECTIVE: datasets}
         if not isinstance(models, Mapping):

From 9170c64ad34fe94fbf526711baf9078d40622ade Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 24 Nov 2023 11:42:19 +0000
Subject: [PATCH 29/33] Share DatasetChecker and tidy-up exp values in tests

---
 tests/unit/objectives/test_utils.py      | 28 +++++-----
 tests/unit/test_ask_tell_optimization.py | 71 ++++++++++++++----------
 tests/unit/test_bayesian_optimizer.py    | 32 +----------
 3 files changed, 56 insertions(+), 75 deletions(-)

diff --git a/tests/unit/objectives/test_utils.py b/tests/unit/objectives/test_utils.py
index 1bc33dcca2..d1832b9443 100644
--- a/tests/unit/objectives/test_utils.py
+++ b/tests/unit/objectives/test_utils.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Sequence, Set, Union
+from typing import Callable, Mapping, Set, Union
 
 import numpy.testing as npt
 import pytest
@@ -59,18 +59,21 @@ def test_mk_multi_observer() -> None:
 
 
 @pytest.mark.parametrize(
-    "input_objective, keys",
+    "input_objective, exp_o_call",
     [
-        (lambda x: x, ["baz"]),
-        (lambda x: Dataset(x, x), ["baz"]),
-        (mk_multi_observer(foo=lambda x: x + 1, bar=lambda x: x - 1), ["foo", "bar"]),
+        (lambda x: x, {"baz": lambda x: x}),
+        (lambda x: Dataset(x, x), {"baz": lambda x: x}),
+        (
+            mk_multi_observer(foo=lambda x: x + 1, bar=lambda x: x - 1),
+            {"foo": lambda x: x + 1, "bar": lambda x: x - 1},
+        ),
     ],
 )
 @pytest.mark.parametrize("batch_size", [1, 2, 3])
 @pytest.mark.parametrize("num_query_points_per_batch", [1, 2])
 def test_mk_batch_observer(
     input_objective: Union[Callable[[TensorType], TensorType], Observer],
-    keys: Sequence[Tag],
+    exp_o_call: Mapping[Tag, Callable[[TensorType], TensorType]],
     batch_size: int,
     num_query_points_per_batch: int,
 ) -> None:
@@ -84,20 +87,15 @@ def test_mk_batch_observer(
 
     # Check keys.
     exp_keys: Set[Union[Tag, LocalizedTag]] = set()
-    for key in keys:
+    for key in exp_o_call:
         exp_keys.update({LocalizedTag(key, i) for i in range(batch_size)})
         exp_keys.add(key)
     assert ys.keys() == exp_keys
 
     # Check datasets.
-    for key in keys:
-        # Different observers (in parameterize above) return different observation values.
-        if key == "foo":
-            exp_o = x_ + 1
-        elif key == "bar":
-            exp_o = x_ - 1
-        else:
-            exp_o = x_
+    for key, call in exp_o_call.items():
+        # Get expected observations.
+        exp_o = call(x_)
 
         npt.assert_array_equal(ys[key].query_points, tf.reshape(x_, [-1, 1]))
         npt.assert_array_equal(ys[key].observations, tf.reshape(exp_o, [-1, 1]))
diff --git a/tests/unit/test_ask_tell_optimization.py b/tests/unit/test_ask_tell_optimization.py
index b696d37c1b..973f9ce352 100644
--- a/tests/unit/test_ask_tell_optimization.py
+++ b/tests/unit/test_ask_tell_optimization.py
@@ -438,6 +438,46 @@ def __deepcopy__(self, memo: dict[int, object]) -> _UncopyableModel:
     assert ask_tell.to_result(copy=False).final_result.is_ok
 
 
+class DatasetChecker(QuadraticMeanAndRBFKernel, PseudoTrainableProbModel):
+    def __init__(
+        self,
+        use_global_model: bool,
+        use_global_init_dataset: bool,
+        init_data: Mapping[Tag, Dataset],
+        query_points: TensorType,
+    ) -> None:
+        super().__init__()
+        self.update_count = 0
+        self._tag = OBJECTIVE
+        self.use_global_model = use_global_model
+        self.use_global_init_dataset = use_global_init_dataset
+        self.init_data = init_data
+        self.query_points = query_points
+
+    def update(self, dataset: Dataset) -> None:
+        if self.use_global_model:
+            exp_init_qps = self.init_data[OBJECTIVE].query_points
+        else:
+            if self.use_global_init_dataset:
+                exp_init_qps = self.init_data[OBJECTIVE].query_points
+            else:
+                exp_init_qps = self.init_data[self._tag].query_points
+
+        if self.update_count == 0:
+            # Initial model training.
+            exp_qps = exp_init_qps
+        else:
+            # Subsequent model training.
+            if self.use_global_model:
+                exp_qps = tf.concat([exp_init_qps, tf.reshape(self.query_points, [-1, 1])], 0)
+            else:
+                index = LocalizedTag.from_tag(self._tag).local_index
+                exp_qps = tf.concat([exp_init_qps, self.query_points[:, index]], 0)
+
+        npt.assert_array_equal(exp_qps, dataset.query_points)
+        self.update_count += 1
+
+
 # Check that the correct dataset is routed to the model.
 # Note: this test is almost identical to the one in test_bayesian_optimizer.py.
 @pytest.mark.parametrize("use_global_model", [True, False])
@@ -461,38 +501,9 @@ def test_ask_tell_optimizer_creates_correct_datasets_for_rank3_points(
         (num_query_points_per_batch, batch_size, 1),
     )
 
-    class DatasetChecker(QuadraticMeanAndRBFKernel, PseudoTrainableProbModel):
-        def __init__(self) -> None:
-            super().__init__()
-            self.update_count = 0
-            self._tag = OBJECTIVE
-
-        def update(self, dataset: Dataset) -> None:
-            if use_global_model:
-                exp_init_qps = init_data[OBJECTIVE].query_points
-            else:
-                if use_global_init_dataset:
-                    exp_init_qps = init_data[OBJECTIVE].query_points
-                else:
-                    exp_init_qps = init_data[self._tag].query_points
-
-            if self.update_count == 0:
-                # Initial model training.
-                exp_qps = exp_init_qps
-            else:
-                # Subsequent model training.
-                if use_global_model:
-                    exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
-                else:
-                    index = LocalizedTag.from_tag(self._tag).local_index
-                    exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
-
-            npt.assert_array_equal(exp_qps, dataset.query_points)
-            self.update_count += 1
-
     search_space = Box([-1], [1])
 
-    model = DatasetChecker()
+    model = DatasetChecker(use_global_model, use_global_init_dataset, init_data, query_points)
     if use_global_model:
         models = {OBJECTIVE: model}
     else:
diff --git a/tests/unit/test_bayesian_optimizer.py b/tests/unit/test_bayesian_optimizer.py
index 26d7bd610b..41c4dac98d 100644
--- a/tests/unit/test_bayesian_optimizer.py
+++ b/tests/unit/test_bayesian_optimizer.py
@@ -23,6 +23,7 @@
 import tensorflow as tf
 from check_shapes import inherit_check_shapes
 
+from tests.unit.test_ask_tell_optimization import DatasetChecker
 from tests.util.misc import (
     FixedAcquisitionRule,
     assert_datasets_allclose,
@@ -261,38 +262,9 @@ def test_bayesian_optimizer_creates_correct_datasets_for_rank3_points(
         (num_query_points_per_batch, batch_size, 1),
     )
 
-    class DatasetChecker(QuadraticMeanAndRBFKernel, PseudoTrainableProbModel):
-        def __init__(self) -> None:
-            super().__init__()
-            self.update_count = 0
-            self._tag = OBJECTIVE
-
-        def update(self, dataset: Dataset) -> None:
-            if use_global_model:
-                exp_init_qps = init_data[OBJECTIVE].query_points
-            else:
-                if use_global_init_dataset:
-                    exp_init_qps = init_data[OBJECTIVE].query_points
-                else:
-                    exp_init_qps = init_data[self._tag].query_points
-
-            if self.update_count == 0:
-                # Initial model training.
-                exp_qps = exp_init_qps
-            else:
-                # Subsequent model training.
-                if use_global_model:
-                    exp_qps = tf.concat([exp_init_qps, tf.reshape(query_points, [-1, 1])], 0)
-                else:
-                    index = LocalizedTag.from_tag(self._tag).local_index
-                    exp_qps = tf.concat([exp_init_qps, query_points[:, index]], 0)
-
-            npt.assert_array_equal(exp_qps, dataset.query_points)
-            self.update_count += 1
-
     search_space = Box([-1], [1])
 
-    model = DatasetChecker()
+    model = DatasetChecker(use_global_model, use_global_init_dataset, init_data, query_points)
     if use_global_model:
         models = {OBJECTIVE: model}
     else:

From efd2fc091402747950a6c5187df23712dcaf9992 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Fri, 24 Nov 2023 12:57:36 +0000
Subject: [PATCH 30/33] Address more feedback

---
 trieste/acquisition/rule.py | 100 +++++++++++++-----------------------
 1 file changed, 36 insertions(+), 64 deletions(-)

diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index de453c0daf..38e0bad86f 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -20,13 +20,12 @@
 import copy
 import math
 from abc import ABC, abstractmethod
-from collections import defaultdict
+from collections import Counter
 from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import (
     Any,
     Callable,
-    Dict,
     Generic,
     Optional,
     Sequence,
@@ -95,6 +94,9 @@
 SearchSpaceType = TypeVar("SearchSpaceType", bound=SearchSpace, contravariant=True)
 """ Contravariant type variable bound to :class:`~trieste.space.SearchSpace`. """
 
+T = TypeVar("T")
+""" Unbound type variable. """
+
 
 class AcquisitionRule(ABC, Generic[ResultType, SearchSpaceType, ProbabilisticModelType]):
     """
@@ -164,7 +166,8 @@ def acquire_single(
 
     def filter_datasets(self, datasets: Mapping[Tag, Dataset]) -> Mapping[Tag, Dataset]:
         """
-        Filter the datasets.
+        Filter the post-acquisition datasets before they are used for model training. For example,
+        this can be used to remove points from the datasets that are no longer in the search space.
 
         :param datasets: The datasets to filter.
         :return: The filtered datasets.
@@ -1017,71 +1020,38 @@ def _get_tags(self, tags: Set[Tag]) -> Tuple[Set[Tag], Set[Tag]]:
                 local_gtags.add(ltag.global_tag)
 
         # Only keep global tags that don't have a matching local tag.
-        global_tags = global_tags.difference(local_gtags)
+        global_tags -= local_gtags
 
         return local_gtags, global_tags
 
-    def select_models(
-        self, models: Optional[Mapping[Tag, ProbabilisticModelType]]
-    ) -> Optional[Mapping[Tag, ProbabilisticModelType]]:
-        """
-        Select models belonging to this region for acquisition.
-
-        :param models: The model for each tag.
-        :return: The models belonging to this region.
-        """
-        if models is None:
-            _models = {}
-        elif self.region_index is None:
-            # If no index, then return the global models.
-            _models = {
-                tag: model
-                for tag, model in models.items()
-                if not LocalizedTag.from_tag(tag).is_local
-            }
-        else:
-            # Prefer matching local model for each tag, otherwise select the global model.
-            local_gtags, global_tags = self._get_tags(set(models))
-
-            _models = {}
-            for tag in local_gtags:
-                ltag = LocalizedTag(tag, self.region_index)
-                _models[ltag] = models[ltag]
-            for tag in global_tags:
-                _models[tag] = models[tag]
-
-        return _models if _models else None
-
-    def select_datasets(
-        self, datasets: Optional[Mapping[Tag, Dataset]]
-    ) -> Optional[Mapping[Tag, Dataset]]:
+    def select_in_region(self, mapping: Optional[Mapping[Tag, T]]) -> Optional[Mapping[Tag, T]]:
         """
-        Select datasets belonging to this region for acquisition.
+        Select items belonging to this region for acquisition.
 
-        :param datasets: The dataset for each tag.
-        :return: The datasets belonging to this region.
+        :param mapping: The mapping of items for each tag.
+        :return: The items belonging to this region (or `None` if there aren't any).
         """
-        if datasets is None:
-            _datasets = {}
+        if mapping is None:
+            _mapping = {}
         elif self.region_index is None:
-            # If no index, then return the global datasets.
-            _datasets = {
-                tag: dataset
-                for tag, dataset in datasets.items()
+            # If no index, then return the global items.
+            _mapping = {
+                tag: item
+                for tag, item in mapping.items()
                 if not LocalizedTag.from_tag(tag).is_local
             }
         else:
-            # Prefer matching local dataset for each tag, otherwise select the global dataset.
-            local_gtags, global_tags = self._get_tags(set(datasets))
+            # Prefer matching local item for each tag, otherwise select the global item.
+            local_gtags, global_tags = self._get_tags(set(mapping))
 
-            _datasets = {}
+            _mapping = {}
             for tag in local_gtags:
                 ltag = LocalizedTag(tag, self.region_index)
-                _datasets[ltag] = datasets[ltag]
+                _mapping[ltag] = mapping[ltag]
             for tag in global_tags:
-                _datasets[tag] = datasets[tag]
+                _mapping[tag] = mapping[tag]
 
-        return _datasets if _datasets else None
+        return _mapping if _mapping else None
 
     def get_datasets_filter_mask(
         self, datasets: Optional[Mapping[Tag, Dataset]]
@@ -1166,6 +1136,8 @@ def __init__(
             self._tags = tuple([str(index) for index in range(len(init_subspaces))])
 
         self._rule = rule
+        # The rules for each subspace. These are only used when we have local models to run the
+        # base rule sequentially for each subspace. Theses are set in `acquire`.
         self._rules: Optional[
             Sequence[AcquisitionRule[TensorType, SearchSpace, ProbabilisticModelType]]
         ] = None
@@ -1201,16 +1173,16 @@ def acquire(
         assert self._tags is not None
         assert self._init_subspaces is not None
 
-        num_local_models: Dict[Tag, int] = defaultdict(int)
-        for tag in models:
-            ltag = LocalizedTag.from_tag(tag)
-            if ltag.is_local:
-                num_local_models[ltag.global_tag] += 1
+        num_local_models = Counter(
+            LocalizedTag.from_tag(tag).global_tag
+            for tag in models
+            if LocalizedTag.from_tag(tag).is_local
+        )
         num_local_models_vals = set(num_local_models.values())
         assert (
             len(num_local_models_vals) <= 1
         ), f"The number of local models should be the same for all tags, got {num_local_models}"
-        _num_local_models = 0 if len(num_local_models_vals) == 0 else num_local_models_vals.pop()
+        _num_local_models = sum(num_local_models_vals)
 
         num_subspaces = len(self._tags)
         assert _num_local_models in [0, num_subspaces], (
@@ -1222,7 +1194,7 @@ def acquire(
         # Otherwise, run the base rule as is, once with all models and datasets.
         # Note: this should only trigger on the first call to `acquire`, as after that we will
         # have a list of rules in `self._rules`.
-        if _num_local_models > 0:
+        if _num_local_models > 0 and self._rules is None:
             self._rules = [copy.deepcopy(self._rule) for _ in range(num_subspaces)]
 
         def state_func(
@@ -1268,8 +1240,8 @@ def state_func(
             if self._rules is not None:
                 _points = []
                 for subspace, rule in zip(subspaces, self._rules):
-                    _models = subspace.select_models(models)
-                    _datasets = subspace.select_datasets(datasets)
+                    _models = subspace.select_in_region(models)
+                    _datasets = subspace.select_in_region(datasets)
                     assert _models is not None
                     # Remap all local tags to global ones. One reason is that single model
                     # acquisition builders expect OBJECTIVE to exist.
@@ -1444,7 +1416,7 @@ def initialize(
         Initialize the box by sampling a location from the global search space and setting the
         bounds.
         """
-        datasets = self.select_datasets(datasets)
+        datasets = self.select_in_region(datasets)
 
         self.location = tf.squeeze(self.global_search_space.sample(1), axis=0)
         self._step_is_success = False
@@ -1468,7 +1440,7 @@ def update(
         ``1 / beta``. Conversely, if it was unsuccessful, the size is reduced by the factor
         ``beta``.
         """
-        datasets = self.select_datasets(datasets)
+        datasets = self.select_in_region(datasets)
 
         if tf.reduce_any(self.eps < self._min_eps):
             self.initialize(models, datasets)

From 505631af9785635b6d14ca73fb9afcd7139f272d Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 29 Nov 2023 18:26:51 +0000
Subject: [PATCH 31/33] Avoid default num_models in integ tests

---
 .../integration/test_ask_tell_optimization.py | 87 +++++++-----------
 .../integration/test_bayesian_optimization.py | 92 ++++++++-----------
 trieste/acquisition/rule.py                   |  2 +-
 3 files changed, 72 insertions(+), 109 deletions(-)

diff --git a/tests/integration/test_ask_tell_optimization.py b/tests/integration/test_ask_tell_optimization.py
index c1460612ef..f8b4203dc9 100644
--- a/tests/integration/test_ask_tell_optimization.py
+++ b/tests/integration/test_ask_tell_optimization.py
@@ -16,7 +16,7 @@
 import copy
 import pickle
 import tempfile
-from typing import Callable
+from typing import Callable, Tuple
 
 import numpy.testing as npt
 import pytest
@@ -49,30 +49,27 @@
 # We use a copy of these for a quicker test against a simple quadratic function
 # (copying is necessary as some of the acquisition rules are stateful).
 OPTIMIZER_PARAMS = (
-    "num_steps, reload_state, acquisition_rule_fn, num_models",
+    "num_steps, reload_state, acquisition_rule_fn",
     [
         pytest.param(
-            20, False, lambda: EfficientGlobalOptimization(), 1, id="EfficientGlobalOptimization"
+            20, False, lambda: EfficientGlobalOptimization(), id="EfficientGlobalOptimization"
         ),
         pytest.param(
             20,
             True,
             lambda: EfficientGlobalOptimization(),
-            1,
             id="EfficientGlobalOptimization/reload_state",
         ),
         pytest.param(
             15,
             False,
             lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
-            1,
             id="TREGO",
         ),
         pytest.param(
             16,
             True,
             lambda: BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
-            1,
             id="TREGO/reload_state",
         ),
         pytest.param(
@@ -85,20 +82,21 @@
                     num_query_points=3,
                 ),
             ),
-            1,
             id="BatchTrustRegionBox",
         ),
         pytest.param(
             10,
             False,
-            lambda: BatchTrustRegionBox(
-                [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
-                EfficientGlobalOptimization(
-                    ParallelContinuousThompsonSampling(),
-                    num_query_points=2,
+            (
+                lambda: BatchTrustRegionBox(
+                    [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
+                    EfficientGlobalOptimization(
+                        ParallelContinuousThompsonSampling(),
+                        num_query_points=2,
+                    ),
                 ),
+                3,
             ),
-            3,
             id="BatchTrustRegionBox/LocalModels",
         ),
         pytest.param(
@@ -110,7 +108,6 @@
                 ).using(OBJECTIVE),
                 num_query_points=3,
             ),
-            1,
             id="LocalPenalization",
         ),
         pytest.param(
@@ -121,22 +118,14 @@
                     ScaledBranin.search_space,
                 ).using(OBJECTIVE),
             ),
-            1,
             id="LocalPenalization/AsynchronousGreedy",
         ),
     ],
 )
 
 
-@random_seed
-@pytest.mark.slow  # to run this, add --runslow yes to the pytest command
-@pytest.mark.parametrize(*OPTIMIZER_PARAMS)
-def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
-    num_steps: int,
-    reload_state: bool,
-    acquisition_rule_fn: Callable[
-        [], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
-    ]
+AcquisitionRuleFunction = (
+    Callable[[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]]
     | Callable[
         [],
         AcquisitionRule[
@@ -144,12 +133,19 @@ def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
             Box,
             TrainableProbabilisticModel,
         ],
-    ],
-    num_models: int,
+    ]
+)
+
+
+@random_seed
+@pytest.mark.slow  # to run this, add --runslow yes to the pytest command
+@pytest.mark.parametrize(*OPTIMIZER_PARAMS)
+def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
+    num_steps: int,
+    reload_state: bool,
+    acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
 ) -> None:
-    _test_ask_tell_optimization_finds_minima(
-        True, num_steps, reload_state, acquisition_rule_fn, num_models
-    )
+    _test_ask_tell_optimization_finds_minima(True, num_steps, reload_state, acquisition_rule_fn)
 
 
 @random_seed
@@ -157,23 +153,12 @@ def test_ask_tell_optimizer_finds_minima_of_the_scaled_branin_function(
 def test_ask_tell_optimizer_finds_minima_of_simple_quadratic(
     num_steps: int,
     reload_state: bool,
-    acquisition_rule_fn: Callable[
-        [], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
-    ]
-    | Callable[
-        [],
-        AcquisitionRule[
-            State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
-            Box,
-            TrainableProbabilisticModel,
-        ],
-    ],
-    num_models: int,
+    acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
 ) -> None:
     # for speed reasons we sometimes test with a simple quadratic defined on the same search space
     # branin; currently assume that every rule should be able to solve this in 5 steps
     _test_ask_tell_optimization_finds_minima(
-        False, min(num_steps, 5), reload_state, acquisition_rule_fn, num_models
+        False, min(num_steps, 5), reload_state, acquisition_rule_fn
     )
 
 
@@ -181,18 +166,7 @@ def _test_ask_tell_optimization_finds_minima(
     optimize_branin: bool,
     num_steps: int,
     reload_state: bool,
-    acquisition_rule_fn: Callable[
-        [], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]
-    ]
-    | Callable[
-        [],
-        AcquisitionRule[
-            State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
-            Box,
-            TrainableProbabilisticModel,
-        ],
-    ],
-    num_models: int,
+    acquisition_rule_fn: AcquisitionRuleFunction | Tuple[AcquisitionRuleFunction, int],
 ) -> None:
     # For the case when optimization state is saved and reload on each iteration
     # we need to use new acquisition function object to imitate real life usage
@@ -204,6 +178,11 @@ def _test_ask_tell_optimization_finds_minima(
     batch_observer = mk_batch_observer(observer)
     initial_data = observer(initial_query_points)
 
+    if isinstance(acquisition_rule_fn, tuple):
+        acquisition_rule_fn, num_models = acquisition_rule_fn
+    else:
+        num_models = 1
+
     model = GaussianProcessRegression(
         build_gpr(initial_data, search_space, likelihood_variance=1e-7)
     )
diff --git a/tests/integration/test_bayesian_optimization.py b/tests/integration/test_bayesian_optimization.py
index 645c1049ec..237539ffb4 100644
--- a/tests/integration/test_bayesian_optimization.py
+++ b/tests/integration/test_bayesian_optimization.py
@@ -1,4 +1,4 @@
-# Copyright 2021 The Trieste Contributors
+# Copyright 2021 The Trieste Contrib_fnutors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -99,13 +99,12 @@
 # (regenerating is necessary as some of the acquisition rules are stateful).
 def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
     return (
-        "num_steps, acquisition_rule, num_models",
+        "num_steps, acquisition_rule",
         [
-            pytest.param(20, EfficientGlobalOptimization(), 1, id="EfficientGlobalOptimization"),
+            pytest.param(20, EfficientGlobalOptimization(), id="EfficientGlobalOptimization"),
             pytest.param(
                 30,
                 EfficientGlobalOptimization(AugmentedExpectedImprovement().using(OBJECTIVE)),
-                1,
                 id="AugmentedExpectedImprovement",
             ),
             pytest.param(
@@ -114,7 +113,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     MonteCarloExpectedImprovement(int(1e3)).using(OBJECTIVE),
                     generate_continuous_optimizer(100),
                 ),
-                1,
                 id="MonteCarloExpectedImprovement",
             ),
             pytest.param(
@@ -125,7 +123,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         min_value_sampler=ThompsonSamplerFromTrajectory(sample_min_value=True),
                     ).using(OBJECTIVE)
                 ),
-                1,
                 id="MinValueEntropySearch",
             ),
             pytest.param(
@@ -134,7 +131,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     BatchExpectedImprovement(sample_size=100).using(OBJECTIVE),
                     num_query_points=3,
                 ),
-                1,
                 id="BatchExpectedImprovement",
             ),
             pytest.param(
@@ -143,11 +139,10 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     BatchMonteCarloExpectedImprovement(sample_size=500).using(OBJECTIVE),
                     num_query_points=3,
                 ),
-                1,
                 id="BatchMonteCarloExpectedImprovement",
             ),
             pytest.param(
-                12, AsynchronousOptimization(num_query_points=3), 1, id="AsynchronousOptimization"
+                12, AsynchronousOptimization(num_query_points=3), id="AsynchronousOptimization"
             ),
             pytest.param(
                 15,
@@ -157,7 +152,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
-                1,
                 id="LocalPenalization",
             ),
             pytest.param(
@@ -168,7 +162,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
-                1,
                 id="LocalPenalization/AsynchronousGreedy",
             ),
             pytest.param(
@@ -179,7 +172,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=2,
                 ),
-                1,
                 id="GIBBON",
             ),
             pytest.param(
@@ -190,13 +182,11 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ).using(OBJECTIVE),
                     num_query_points=3,
                 ),
-                1,
                 id="MultipleOptimismNegativeLowerConfidenceBound",
             ),
             pytest.param(
                 20,
                 BatchTrustRegionBox(TREGOBox(ScaledBranin.search_space)),
-                1,
                 id="TREGO",
             ),
             pytest.param(
@@ -209,7 +199,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         ).using(OBJECTIVE)
                     ),
                 ),
-                1,
                 id="TREGO/MinValueEntropySearch",
             ),
             pytest.param(
@@ -221,13 +210,11 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         num_query_points=3,
                     ),
                 ),
-                1,
                 id="TREGO/ParallelContinuousThompsonSampling",
             ),
             pytest.param(
                 10,
                 TURBO(ScaledBranin.search_space, rule=DiscreteThompsonSampling(500, 3)),
-                1,
                 id="Turbo",
             ),
             pytest.param(
@@ -239,29 +226,32 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                         num_query_points=3,
                     ),
                 ),
-                1,
                 id="BatchTrustRegionBox",
             ),
             pytest.param(
                 10,
-                BatchTrustRegionBox(
-                    [SingleObjectiveTrustRegionBox(ScaledBranin.search_space) for _ in range(3)],
-                    EfficientGlobalOptimization(
-                        ParallelContinuousThompsonSampling(),
-                        num_query_points=2,
+                (
+                    BatchTrustRegionBox(
+                        [
+                            SingleObjectiveTrustRegionBox(ScaledBranin.search_space)
+                            for _ in range(3)
+                        ],
+                        EfficientGlobalOptimization(
+                            ParallelContinuousThompsonSampling(),
+                            num_query_points=2,
+                        ),
                     ),
+                    3,
                 ),
-                3,
                 id="BatchTrustRegionBox/LocalModels",
             ),
-            pytest.param(15, DiscreteThompsonSampling(500, 5), 1, id="DiscreteThompsonSampling"),
+            pytest.param(15, DiscreteThompsonSampling(500, 5), id="DiscreteThompsonSampling"),
             pytest.param(
                 15,
                 EfficientGlobalOptimization(
                     Fantasizer(),
                     num_query_points=3,
                 ),
-                1,
                 id="Fantasizer",
             ),
             pytest.param(
@@ -270,7 +260,6 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     GreedyContinuousThompsonSampling(),
                     num_query_points=5,
                 ),
-                1,
                 id="GreedyContinuousThompsonSampling",
             ),
             pytest.param(
@@ -279,13 +268,11 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
                     ParallelContinuousThompsonSampling(),
                     num_query_points=5,
                 ),
-                1,
                 id="ParallelContinuousThompsonSampling",
             ),
             pytest.param(
                 15,
                 BatchHypervolumeSharpeRatioIndicator() if pymoo else None,
-                1,
                 id="BatchHypevolumeSharpeRatioIndicator",
                 marks=pytest.mark.qhsri,
             ),
@@ -293,25 +280,29 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
     )
 
 
+AcquisitionRuleType = (
+    AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType]
+    | AcquisitionRule[
+        State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
+        Box,
+        TrainableProbabilisticModelType,
+    ]
+)
+
+
 @random_seed
 @pytest.mark.slow  # to run this, add --runslow yes to the pytest command
 @pytest.mark.parametrize(*GPR_OPTIMIZER_PARAMS())
 def test_bayesian_optimizer_with_gpr_finds_minima_of_scaled_branin(
     num_steps: int,
-    acquisition_rule: AcquisitionRule[TensorType, SearchSpace, GaussianProcessRegression]
-    | AcquisitionRule[
-        State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
-        Box,
-        GaussianProcessRegression,
-    ],
-    num_models: int,
+    acquisition_rule: AcquisitionRuleType[GaussianProcessRegression]
+    | Tuple[AcquisitionRuleType[GaussianProcessRegression], int],
 ) -> None:
     _test_optimizer_finds_minimum(
         GaussianProcessRegression,
         num_steps,
         acquisition_rule,
         optimize_branin=True,
-        num_models=num_models,
     )
 
 
@@ -319,19 +310,12 @@ def test_bayesian_optimizer_with_gpr_finds_minima_of_scaled_branin(
 @pytest.mark.parametrize(*GPR_OPTIMIZER_PARAMS())
 def test_bayesian_optimizer_with_gpr_finds_minima_of_simple_quadratic(
     num_steps: int,
-    acquisition_rule: AcquisitionRule[TensorType, SearchSpace, GaussianProcessRegression]
-    | AcquisitionRule[
-        State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
-        Box,
-        GaussianProcessRegression,
-    ],
-    num_models: int,
+    acquisition_rule: AcquisitionRuleType[GaussianProcessRegression]
+    | Tuple[AcquisitionRuleType[GaussianProcessRegression], int],
 ) -> None:
     # for speed reasons we sometimes test with a simple quadratic defined on the same search space
     # branin; currently assume that every rule should be able to solve this in 6 steps
-    _test_optimizer_finds_minimum(
-        GaussianProcessRegression, min(num_steps, 6), acquisition_rule, num_models=num_models
-    )
+    _test_optimizer_finds_minimum(GaussianProcessRegression, min(num_steps, 6), acquisition_rule)
 
 
 @random_seed
@@ -595,16 +579,11 @@ def test_bayesian_optimizer_with_PCTS_and_deep_ensemble_finds_minima_of_simple_q
 def _test_optimizer_finds_minimum(
     model_type: Type[TrainableProbabilisticModelType],
     num_steps: Optional[int],
-    acquisition_rule: AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType]
-    | AcquisitionRule[
-        State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
-        Box,
-        TrainableProbabilisticModelType,
-    ],
+    acquisition_rule: AcquisitionRuleType[TrainableProbabilisticModelType]
+    | Tuple[AcquisitionRuleType[TrainableProbabilisticModelType], int],
     optimize_branin: bool = False,
     model_args: Optional[Mapping[str, Any]] = None,
     check_regret: bool = False,
-    num_models: int = 1,
 ) -> None:
     model_args = model_args or {}
 
@@ -630,6 +609,11 @@ def _test_optimizer_finds_minimum(
     observer = mk_observer(ScaledBranin.objective if optimize_branin else SimpleQuadratic.objective)
     initial_data = observer(initial_query_points)
 
+    if isinstance(acquisition_rule, tuple):
+        acquisition_rule, num_models = acquisition_rule
+    else:
+        num_models = 1
+
     model: TrainableProbabilisticModel  # (really TPMType, but that's too complicated for mypy)
 
     if model_type is GaussianProcessRegression:
diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 38e0bad86f..3c4376d727 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -1072,7 +1072,7 @@ def get_datasets_filter_mask(
         if datasets is None:
             return None
         else:
-            # Only keep points that are in the box.
+            # Only keep points that are in the region.
             return {
                 tag: self.contains(dataset.query_points)
                 for tag, dataset in datasets.items()

From b90d3a193f7d8a80db9e4410b584cac5a89bb909 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 29 Nov 2023 19:48:05 +0000
Subject: [PATCH 32/33] Fix old python typing issue

---
 tests/integration/test_ask_tell_optimization.py | 14 +++++++-------
 tests/integration/test_bayesian_optimization.py | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/integration/test_ask_tell_optimization.py b/tests/integration/test_ask_tell_optimization.py
index f8b4203dc9..3be8bc28d7 100644
--- a/tests/integration/test_ask_tell_optimization.py
+++ b/tests/integration/test_ask_tell_optimization.py
@@ -16,7 +16,7 @@
 import copy
 import pickle
 import tempfile
-from typing import Callable, Tuple
+from typing import Callable, Tuple, Union
 
 import numpy.testing as npt
 import pytest
@@ -124,17 +124,17 @@
 )
 
 
-AcquisitionRuleFunction = (
-    Callable[[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]]
-    | Callable[
+AcquisitionRuleFunction = Union[
+    Callable[[], AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModel]],
+    Callable[
         [],
         AcquisitionRule[
-            State[TensorType, AsynchronousRuleState | BatchTrustRegionBox.State],
+            State[TensorType, Union[AsynchronousRuleState, BatchTrustRegionBox.State]],
             Box,
             TrainableProbabilisticModel,
         ],
-    ]
-)
+    ],
+]
 
 
 @random_seed
diff --git a/tests/integration/test_bayesian_optimization.py b/tests/integration/test_bayesian_optimization.py
index 237539ffb4..d6ff399889 100644
--- a/tests/integration/test_bayesian_optimization.py
+++ b/tests/integration/test_bayesian_optimization.py
@@ -16,7 +16,7 @@
 import tempfile
 from functools import partial
 from pathlib import Path
-from typing import Any, List, Mapping, Optional, Tuple, Type, cast
+from typing import Any, List, Mapping, Optional, Tuple, Type, Union, cast
 
 import dill
 import gpflow
@@ -280,14 +280,14 @@ def GPR_OPTIMIZER_PARAMS() -> Tuple[str, List[ParameterSet]]:
     )
 
 
-AcquisitionRuleType = (
-    AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType]
-    | AcquisitionRule[
-        State[TensorType, AsynchronousRuleState | BatchTrustRegion.State],
+AcquisitionRuleType = Union[
+    AcquisitionRule[TensorType, SearchSpace, TrainableProbabilisticModelType],
+    AcquisitionRule[
+        State[TensorType, Union[AsynchronousRuleState, BatchTrustRegion.State]],
         Box,
         TrainableProbabilisticModelType,
-    ]
-)
+    ],
+]
 
 
 @random_seed

From a75e618679891beb493833bab77611d0ae719c58 Mon Sep 17 00:00:00 2001
From: Khurram Ghani <khurram.ghani@secondmind.ai>
Date: Wed, 13 Dec 2023 18:27:51 +0000
Subject: [PATCH 33/33] Use flatten_... func and add comment

---
 trieste/acquisition/rule.py | 4 ++--
 trieste/objectives/utils.py | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/trieste/acquisition/rule.py b/trieste/acquisition/rule.py
index 3c4376d727..dc172ab2a6 100644
--- a/trieste/acquisition/rule.py
+++ b/trieste/acquisition/rule.py
@@ -1010,8 +1010,8 @@ def update(
     def _get_tags(self, tags: Set[Tag]) -> Tuple[Set[Tag], Set[Tag]]:
         # Separate tags into local (matching index) and global tags (without matching
         # local tag).
-        local_gtags = set()
-        global_tags = set()
+        local_gtags = set()  # Set of global part of all local tags.
+        global_tags = set()  # Set of all global tags.
         for tag in tags:
             ltag = LocalizedTag.from_tag(tag)
             if not ltag.is_local:
diff --git a/trieste/objectives/utils.py b/trieste/objectives/utils.py
index d765feb13f..b074738ed1 100644
--- a/trieste/objectives/utils.py
+++ b/trieste/objectives/utils.py
@@ -22,13 +22,12 @@
 from collections.abc import Callable
 from typing import Mapping, Optional, Union, overload
 
-import tensorflow as tf
 from check_shapes import check_shapes
 
 from ..data import Dataset
 from ..observer import OBJECTIVE, MultiObserver, Observer, SingleObserver
 from ..types import Tag, TensorType
-from ..utils.misc import LocalizedTag
+from ..utils.misc import LocalizedTag, flatten_leading_dims
 
 
 @overload
@@ -83,7 +82,7 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         # Call objective with rank 2 query points by flattening batch dimension.
         # Some objectives might only expect rank 2 query points, so this is safer.
         batch_size = qps.shape[1]
-        flat_qps = tf.reshape(qps, [-1, qps.shape[-1]])
+        flat_qps, unflatten = flatten_leading_dims(qps)
         obs_or_dataset = objective_or_observer(flat_qps)
 
         if not isinstance(obs_or_dataset, (Mapping, Dataset)):
@@ -98,8 +97,8 @@ def _observer(qps: TensorType) -> Mapping[Tag, Dataset]:
         for key, dataset in obs_or_dataset.items():
             # Include overall dataset and per batch dataset.
             flat_obs = dataset.observations
-            qps = tf.reshape(flat_qps, [-1, batch_size, flat_qps.shape[-1]])
-            obs = tf.reshape(flat_obs, [-1, batch_size, flat_obs.shape[-1]])
+            qps = unflatten(flat_qps)
+            obs = unflatten(flat_obs)
             datasets[key] = dataset
             for i in range(batch_size):
                 datasets[LocalizedTag(key, i)] = Dataset(qps[:, i], obs[:, i])