From f07f2ea50021f3e576560d0cbb242652e0ab291a Mon Sep 17 00:00:00 2001 From: uri-granta <50578464+uri-granta@users.noreply.github.com> Date: Wed, 21 Aug 2024 08:30:48 +0100 Subject: [PATCH] Encoded models (#864) --- .../test_mixed_space_bayesian_optimization.py | 93 +++++++++- ...est_multifidelity_bayesian_optimization.py | 10 +- tests/unit/models/conftest.py | 8 +- tests/unit/models/gpflow/test_interface.py | 16 +- tests/unit/models/test_interfaces.py | 96 ++++++++++ .../unit/objectives/test_multi_objectives.py | 7 +- .../unit/objectives/test_single_objectives.py | 11 +- tests/unit/test_space.py | 36 ++-- trieste/acquisition/optimizer.py | 10 +- trieste/models/gpflow/builders.py | 19 +- trieste/models/gpflow/interface.py | 60 +++++-- trieste/models/gpflow/models.py | 98 +++++----- trieste/models/interfaces.py | 168 +++++++++++++++++- trieste/models/keras/interface.py | 29 ++- trieste/models/keras/models.py | 36 ++-- trieste/objectives/multi_objectives.py | 10 +- .../objectives/multifidelity_objectives.py | 4 +- trieste/objectives/single_objectives.py | 10 +- trieste/space.py | 21 ++- 19 files changed, 599 insertions(+), 143 deletions(-) diff --git a/tests/integration/test_mixed_space_bayesian_optimization.py b/tests/integration/test_mixed_space_bayesian_optimization.py index 616eb16508..84cbe3bf29 100644 --- a/tests/integration/test_mixed_space_bayesian_optimization.py +++ b/tests/integration/test_mixed_space_bayesian_optimization.py @@ -39,10 +39,17 @@ from trieste.bayesian_optimizer import BayesianOptimizer from trieste.models import TrainableProbabilisticModel from trieste.models.gpflow import GaussianProcessRegression, build_gpr -from trieste.objectives import ScaledBranin +from trieste.objectives import ScaledBranin, SingleObjectiveTestProblem +from trieste.objectives.single_objectives import scaled_branin from trieste.objectives.utils import mk_observer from trieste.observer import OBJECTIVE -from trieste.space import Box, DiscreteSearchSpace, TaggedProductSearchSpace +from trieste.space import ( + Box, + CategoricalSearchSpace, + DiscreteSearchSpace, + TaggedProductSearchSpace, + one_hot_encoder, +) from trieste.types import TensorType @@ -190,3 +197,85 @@ def test_optimizer_finds_minima_of_the_scaled_branin_function( acquisition_function = acquisition_rule._acquisition_function if isinstance(acquisition_function, AcquisitionFunctionClass): assert acquisition_function.__call__._get_tracing_count() <= 4 # type: ignore + + +def categorical_scaled_branin( + categories_to_points: TensorType, +) -> SingleObjectiveTestProblem[TaggedProductSearchSpace]: + """ + Generate a Scaled Branin test problem defined on the product of a categorical space and a + continuous space, with categories mapped to points using the given 1D tensor. + """ + categorical_space = CategoricalSearchSpace([str(float(v)) for v in categories_to_points]) + continuous_space = Box([0], [1]) + search_space = TaggedProductSearchSpace( + spaces=[categorical_space, continuous_space], + tags=["discrete", "continuous"], + ) + + def objective(x: TensorType) -> TensorType: + points = tf.gather(categories_to_points, tf.cast(x[..., 0], tf.int32)) + x_mapped = tf.concat([tf.expand_dims(points, -1), x[..., 1:]], axis=-1) + return scaled_branin(x_mapped) + + minimizer_indices = [] + for minimizer0 in ScaledBranin.minimizers[..., 0]: + indices = tf.where(tf.equal(categories_to_points, minimizer0)) + minimizer_indices.append(indices[0][0]) + category_indices = tf.expand_dims(tf.convert_to_tensor(minimizer_indices, dtype=tf.float64), -1) + minimizers = tf.concat([category_indices, ScaledBranin.minimizers[..., 1:]], axis=-1) + + return SingleObjectiveTestProblem( + name="Categorical scaled Branin", + objective=objective, + search_space=search_space, + minimizers=minimizers, + minimum=ScaledBranin.minimum, + ) + + +@random_seed +@pytest.mark.parametrize( + "num_steps, acquisition_rule", + [ + pytest.param(25, EfficientGlobalOptimization(), id="EfficientGlobalOptimization"), + ], +) +def test_optimizer_finds_minima_of_the_categorical_scaled_branin_function( + num_steps: int, + acquisition_rule: AcquisitionRule[ + TensorType, TaggedProductSearchSpace, TrainableProbabilisticModel + ], +) -> None: + # 6 categories mapping to 3 random points plus the 3 minimizer points + points = tf.concat( + [tf.random.uniform([3], dtype=tf.float64), ScaledBranin.minimizers[..., 0]], 0 + ) + problem = categorical_scaled_branin(tf.random.shuffle(points)) + initial_query_points = problem.search_space.sample(5) + observer = mk_observer(problem.objective) + initial_data = observer(initial_query_points) + + # model uses one-hot encoding for the categorical inputs + encoder = one_hot_encoder(problem.search_space) + model = GaussianProcessRegression( + build_gpr(initial_data, problem.search_space, likelihood_variance=1e-8), + encoder=encoder, + ) + + dataset = ( + BayesianOptimizer(observer, problem.search_space) + .optimize(num_steps, initial_data, model, acquisition_rule) + .try_get_final_dataset() + ) + + arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0)) + + best_y = dataset.observations[arg_min_idx] + best_x = dataset.query_points[arg_min_idx] + + relative_minimizer_err = tf.abs((best_x - problem.minimizers) / problem.minimizers) + assert tf.reduce_any( + tf.reduce_all(relative_minimizer_err < 0.1, axis=-1), axis=0 + ), relative_minimizer_err + npt.assert_allclose(best_y, problem.minimum, rtol=0.005) diff --git a/tests/integration/test_multifidelity_bayesian_optimization.py b/tests/integration/test_multifidelity_bayesian_optimization.py index 39202a6b4c..ac25dca63a 100644 --- a/tests/integration/test_multifidelity_bayesian_optimization.py +++ b/tests/integration/test_multifidelity_bayesian_optimization.py @@ -38,11 +38,13 @@ ) from trieste.objectives.utils import mk_observer from trieste.observer import SingleObserver -from trieste.space import TaggedProductSearchSpace +from trieste.space import SearchSpaceType, TaggedProductSearchSpace from trieste.types import TensorType -def _build_observer(problem: SingleObjectiveMultifidelityTestProblem) -> SingleObserver: +def _build_observer( + problem: SingleObjectiveMultifidelityTestProblem[SearchSpaceType], +) -> SingleObserver: objective_function = problem.objective def noisy_objective(x: TensorType) -> TensorType: @@ -57,7 +59,7 @@ def noisy_objective(x: TensorType) -> TensorType: def _build_nested_multifidelity_dataset( - problem: SingleObjectiveMultifidelityTestProblem, observer: SingleObserver + problem: SingleObjectiveMultifidelityTestProblem[SearchSpaceType], observer: SingleObserver ) -> Dataset: num_fidelities = problem.num_fidelities initial_sample_sizes = [10 + 2 * (num_fidelities - i) for i in range(num_fidelities)] @@ -83,7 +85,7 @@ def _build_nested_multifidelity_dataset( @random_seed @pytest.mark.parametrize("problem", ((Linear2Fidelity), (Linear3Fidelity), (Linear5Fidelity))) def test_multifidelity_bo_finds_minima_of_linear_problem( - problem: SingleObjectiveMultifidelityTestProblem, + problem: SingleObjectiveMultifidelityTestProblem[SearchSpaceType], ) -> None: observer = _build_observer(problem) initial_data = _build_nested_multifidelity_dataset(problem, observer) diff --git a/tests/unit/models/conftest.py b/tests/unit/models/conftest.py index 9a41a18316..107a5e7193 100644 --- a/tests/unit/models/conftest.py +++ b/tests/unit/models/conftest.py @@ -43,6 +43,7 @@ VariationalGaussianProcess, ) from trieste.models.optimizer import DatasetTransformer, Optimizer +from trieste.space import EncoderFunction from trieste.types import TensorType @@ -58,12 +59,15 @@ ) def _gpflow_interface_factory(request: Any) -> ModelFactoryType: def model_interface_factory( - x: TensorType, y: TensorType, optimizer: Optimizer | None = None + x: TensorType, + y: TensorType, + optimizer: Optimizer | None = None, + encoder: EncoderFunction | None = None, ) -> tuple[GPflowPredictor, Callable[[TensorType, TensorType], GPModel]]: model_interface: Callable[..., GPflowPredictor] = request.param[0] base_model: GaussianProcessRegression = request.param[1](x, y) reference_model: Callable[[TensorType, TensorType], GPModel] = request.param[1] - return model_interface(base_model, optimizer=optimizer), reference_model + return model_interface(base_model, optimizer=optimizer, encoder=encoder), reference_model return model_interface_factory diff --git a/tests/unit/models/gpflow/test_interface.py b/tests/unit/models/gpflow/test_interface.py index d1f117d790..e55929bb17 100644 --- a/tests/unit/models/gpflow/test_interface.py +++ b/tests/unit/models/gpflow/test_interface.py @@ -24,6 +24,7 @@ from tests.util.misc import random_seed from trieste.data import Dataset from trieste.models.gpflow import BatchReparametrizationSampler, GPflowPredictor +from trieste.space import CategoricalSearchSpace, one_hot_encoder class _QuadraticPredictor(GPflowPredictor): @@ -31,10 +32,10 @@ class _QuadraticPredictor(GPflowPredictor): def model(self) -> GPModel: return _QuadraticGPModel() - def optimize(self, dataset: Dataset) -> None: + def optimize_encoded(self, dataset: Dataset) -> None: self.optimizer.optimize(self.model, dataset) - def update(self, dataset: Dataset) -> None: + def update_encoded(self, dataset: Dataset) -> None: return def log(self, dataset: Optional[Dataset] = None) -> None: @@ -112,3 +113,14 @@ def test_gpflow_reparam_sampler_returns_reparam_sampler_with_correct_samples() - linear_error = 1 / tf.sqrt(tf.cast(num_samples, tf.float32)) npt.assert_allclose(sample_mean, [[6.25]], rtol=linear_error) npt.assert_allclose(sample_variance, 1.0, rtol=2 * linear_error) + + +def test_gpflow_categorical_predict() -> None: + search_space = CategoricalSearchSpace(["Red", "Green", "Blue"]) + query_points = search_space.sample(10) + model = _QuadraticPredictor(encoder=one_hot_encoder(search_space)) + mean, variance = model.predict(query_points) + assert mean.shape == [10, 1] + assert variance.shape == [10, 1] + npt.assert_allclose(mean, [[1.0]] * 10, rtol=0.01) + npt.assert_allclose(variance, [[1.0]] * 10, rtol=0.01) diff --git a/tests/unit/models/test_interfaces.py b/tests/unit/models/test_interfaces.py index 6213f7e723..b650f62f16 100644 --- a/tests/unit/models/test_interfaces.py +++ b/tests/unit/models/test_interfaces.py @@ -15,6 +15,7 @@ from __future__ import annotations from collections.abc import Callable, Sequence +from typing import Optional import gpflow import numpy as np @@ -35,12 +36,17 @@ from trieste.data import Dataset from trieste.models import TrainableModelStack, TrainableProbabilisticModel from trieste.models.interfaces import ( + EncodedProbabilisticModel, + EncodedSupportsPredictJoint, + EncodedSupportsPredictY, + EncodedTrainableProbabilisticModel, TrainablePredictJointReparamModelStack, TrainablePredictYModelStack, TrainableSupportsPredictJoint, TrainableSupportsPredictJointHasReparamSampler, ) from trieste.models.utils import get_last_optimization_result, optimize_model_and_save_result +from trieste.space import EncoderFunction from trieste.types import TensorType @@ -216,3 +222,93 @@ def test_model_stack_reparam_sampler() -> None: npt.assert_allclose(var[..., :2], var01, rtol=0.04) npt.assert_allclose(var[..., 2:3], var2, rtol=0.04) npt.assert_allclose(var[..., 3:], var3, rtol=0.04) + + +class _EncodedModel( + EncodedTrainableProbabilisticModel, + EncodedSupportsPredictJoint, + EncodedSupportsPredictY, + EncodedProbabilisticModel, +): + def __init__(self, encoder: EncoderFunction | None = None) -> None: + self.dataset: Dataset | None = None + self._encoder = (lambda x: x + 1) if encoder is None else encoder + + @property + def encoder(self) -> EncoderFunction | None: + return self._encoder + + def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + return query_points, query_points + + def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType: + return tf.tile(tf.expand_dims(query_points, 0), [num_samples, 1, 1]) + + def log(self, dataset: Optional[Dataset] = None) -> None: + pass + + def update_encoded(self, dataset: Dataset) -> None: + self.dataset = dataset + + def optimize_encoded(self, dataset: Dataset) -> None: + self.dataset = dataset + + def predict_joint_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + b, d = query_points.shape + return query_points, tf.zeros([d, b, b]) + + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + return self.predict_encoded(query_points) + + +def test_encoded_probabilistic_model() -> None: + model = _EncodedModel() + query_points = tf.random.uniform([3, 5]) + mean, var = model.predict(query_points) + npt.assert_allclose(mean, query_points + 1) + npt.assert_allclose(var, query_points + 1) + samples = model.sample(query_points, 10) + assert len(samples) == 10 + for i in range(10): + npt.assert_allclose(samples[i], query_points + 1) + + +def test_encoded_trainable_probabilistic_model() -> None: + model = _EncodedModel() + assert model.dataset is None + for method in model.update, model.optimize: + query_points = tf.random.uniform([3, 5]) + observations = tf.random.uniform([3, 1]) + dataset = Dataset(query_points, observations) + method(dataset) + assert model.dataset is not None + # no idea why mypy thinks model.dataset couldn't have changed here + npt.assert_allclose( # type: ignore[unreachable] + model.dataset.query_points, query_points + 1 + ) + npt.assert_allclose(model.dataset.observations, observations) + + +def test_encoded_supports_predict_joint() -> None: + model = _EncodedModel() + query_points = tf.random.uniform([3, 5]) + mean, var = model.predict_joint(query_points) + npt.assert_allclose(mean, query_points + 1) + npt.assert_allclose(var, tf.zeros([5, 3, 3])) + + +def test_encoded_supports_predict_y() -> None: + model = _EncodedModel() + query_points = tf.random.uniform([3, 5]) + mean, var = model.predict_y(query_points) + npt.assert_allclose(mean, query_points + 1) + npt.assert_allclose(var, query_points + 1) + + +def test_encoded_probabilistic_model_keras_embedding() -> None: + encoder = tf.keras.layers.Embedding(3, 2) + model = _EncodedModel(encoder=encoder) + query_points = tf.random.uniform([3, 5], minval=0, maxval=3, dtype=tf.int32) + mean, var = model.predict(query_points) + assert mean.shape == (3, 5, 2) + npt.assert_allclose(mean, encoder(query_points)) diff --git a/tests/unit/objectives/test_multi_objectives.py b/tests/unit/objectives/test_multi_objectives.py index c3063be5bb..67d1fe98df 100644 --- a/tests/unit/objectives/test_multi_objectives.py +++ b/tests/unit/objectives/test_multi_objectives.py @@ -19,6 +19,7 @@ from check_shapes.exceptions import ShapeMismatchError from trieste.objectives.multi_objectives import DTLZ1, DTLZ2, VLMOP2, MultiObjectiveTestProblem +from trieste.space import SearchSpaceType from trieste.types import TensorType @@ -117,7 +118,7 @@ def test_dtlz2_has_expected_output( ], ) def test_gen_pareto_front_is_equal_to_math_defined( - obj_type: Callable[[int, int], MultiObjectiveTestProblem], + obj_type: Callable[[int, int], MultiObjectiveTestProblem[SearchSpaceType]], input_dim: int, num_obj: int, gen_pf_num: int, @@ -140,7 +141,7 @@ def test_gen_pareto_front_is_equal_to_math_defined( ], ) def test_func_raises_specified_input_dim_not_align_with_actual_input_dim( - obj_inst: MultiObjectiveTestProblem, actual_x: TensorType + obj_inst: MultiObjectiveTestProblem[SearchSpaceType], actual_x: TensorType ) -> None: with pytest.raises(ShapeMismatchError): obj_inst.objective(actual_x) @@ -160,7 +161,7 @@ def test_func_raises_specified_input_dim_not_align_with_actual_input_dim( @pytest.mark.parametrize("num_obs", [1, 5, 10]) @pytest.mark.parametrize("dtype", [tf.float32, tf.float64]) def test_objective_has_correct_shape_and_dtype( - problem: MultiObjectiveTestProblem, + problem: MultiObjectiveTestProblem[SearchSpaceType], input_dim: int, num_obj: int, num_obs: int, diff --git a/tests/unit/objectives/test_single_objectives.py b/tests/unit/objectives/test_single_objectives.py index fbe0c1e9b1..882f56336d 100644 --- a/tests/unit/objectives/test_single_objectives.py +++ b/tests/unit/objectives/test_single_objectives.py @@ -36,6 +36,7 @@ SingleObjectiveTestProblem, Trid10, ) +from trieste.space import Box, SearchSpaceType @pytest.fixture( @@ -58,12 +59,12 @@ Levy8, ], ) -def _problem_fixture(request: Any) -> Tuple[SingleObjectiveTestProblem, int]: +def _problem_fixture(request: Any) -> Tuple[SingleObjectiveTestProblem[SearchSpaceType], int]: return request.param def test_objective_maps_minimizers_to_minimum( - problem: SingleObjectiveTestProblem, + problem: SingleObjectiveTestProblem[SearchSpaceType], ) -> None: objective = problem.objective minimizers = problem.minimizers @@ -74,7 +75,7 @@ def test_objective_maps_minimizers_to_minimum( def test_no_function_values_are_less_than_global_minimum( - problem: SingleObjectiveTestProblem, + problem: SingleObjectiveTestProblem[Box], ) -> None: objective = problem.objective space = problem.search_space @@ -86,7 +87,7 @@ def test_no_function_values_are_less_than_global_minimum( @pytest.mark.parametrize("num_obs", [5, 1]) @pytest.mark.parametrize("dtype", [tf.float32, tf.float64]) def test_objective_has_correct_shape_and_dtype( - problem: SingleObjectiveTestProblem, + problem: SingleObjectiveTestProblem[SearchSpaceType], num_obs: int, dtype: tf.DType, ) -> None: @@ -120,7 +121,7 @@ def test_objective_has_correct_shape_and_dtype( ) @pytest.mark.parametrize("num_obs", [5, 1]) def test_search_space_has_correct_shape_and_default_dtype( - problem: SingleObjectiveTestProblem, + problem: SingleObjectiveTestProblem[SearchSpaceType], input_dim: int, num_obs: int, ) -> None: diff --git a/tests/unit/test_space.py b/tests/unit/test_space.py index b2caae1612..6fb473d130 100644 --- a/tests/unit/test_space.py +++ b/tests/unit/test_space.py @@ -1756,51 +1756,51 @@ def test_categorical_search_space__to_tags_raises_for_non_integers() -> None: [ ( CategoricalSearchSpace(["V"]), - tf.constant([[0], [0]]), - tf.constant([[1], [1]], dtype=tf.float32), + tf.constant([[0], [0]], dtype=tf.float64), + tf.constant([[1], [1]], dtype=tf.float64), ), ( - CategoricalSearchSpace(["R", "G", "B"]), - tf.constant([[0], [2], [1]]), + CategoricalSearchSpace(["R", "G", "B"], dtype=tf.float32), + tf.constant([[0], [2], [1]], dtype=tf.float32), tf.constant([[1, 0, 0], [0, 0, 1], [0, 1, 0]], dtype=tf.float32), ), ( CategoricalSearchSpace(["R", "G", "B"]), - tf.constant([[[[[0]]]]]), - tf.constant([[[[[1, 0, 0]]]]], dtype=tf.float32), + tf.constant([[[[[0]]]]], dtype=tf.float64), + tf.constant([[[[[1, 0, 0]]]]], dtype=tf.float64), ), ( - CategoricalSearchSpace(["R", "G", "B", "A"]), - tf.constant([[0], [2], [2]]), + CategoricalSearchSpace(["R", "G", "B", "A"], dtype=tf.float32), + tf.constant([[0], [2], [2]], dtype=tf.float32), tf.constant([[1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]], dtype=tf.float32), ), ( CategoricalSearchSpace([["R", "G", "B"], ["Y", "N"]]), - tf.constant([[0, 0], [2, 0], [1, 1]]), - tf.constant([[1, 0, 0, 1, 0], [0, 0, 1, 1, 0], [0, 1, 0, 0, 1]], dtype=tf.float32), + tf.constant([[0, 0], [2, 0], [1, 1]], dtype=tf.float64), + tf.constant([[1, 0, 0, 1, 0], [0, 0, 1, 1, 0], [0, 1, 0, 0, 1]], dtype=tf.float64), ), ( CategoricalSearchSpace([["R", "G", "B"], ["Y", "N"]]), - tf.constant([[[0, 0], [0, 0]], [[2, 0], [1, 1]]]), + tf.constant([[[0, 0], [0, 0]], [[2, 0], [1, 1]]], dtype=tf.float64), tf.constant( [[[1, 0, 0, 1, 0], [1, 0, 0, 1, 0]], [[0, 0, 1, 1, 0], [0, 1, 0, 0, 1]]], - dtype=tf.float32, + dtype=tf.float64, ), ), ( TaggedProductSearchSpace([Box([0.0], [1.0]), CategoricalSearchSpace(["R", "G", "B"])]), - tf.constant([[0.5, 0], [0.3, 2]]), - tf.constant([[0.5, 1, 0, 0], [0.3, 0, 0, 1]], dtype=tf.float32), + tf.constant([[0.5, 0], [0.3, 2]], dtype=tf.float64), + tf.constant([[0.5, 1, 0, 0], [0.3, 0, 0, 1]], dtype=tf.float64), ), ( TaggedProductSearchSpace([Box([0.0], [1.0]), CategoricalSearchSpace(["R", "G", "B"])]), - tf.constant([[[0.5, 0]], [[0.3, 2]]]), - tf.constant([[[0.5, 1, 0, 0]], [[0.3, 0, 0, 1]]], dtype=tf.float32), + tf.constant([[[0.5, 0]], [[0.3, 2]]], dtype=tf.float64), + tf.constant([[[0.5, 1, 0, 0]], [[0.3, 0, 0, 1]]], dtype=tf.float64), ), ( Box([0.0], [1.0]), - tf.constant([[0.5], [0.3]]), - tf.constant([[0.5], [0.3]], dtype=tf.float32), + tf.constant([[0.5], [0.3]], dtype=tf.float64), + tf.constant([[0.5], [0.3]], dtype=tf.float64), ), ], ) diff --git a/trieste/acquisition/optimizer.py b/trieste/acquisition/optimizer.py index 01c33ece7e..d4ab1600d5 100644 --- a/trieste/acquisition/optimizer.py +++ b/trieste/acquisition/optimizer.py @@ -34,7 +34,7 @@ Box, CollectionSearchSpace, Constraint, - DiscreteSearchSpace, + GeneralDiscreteSearchSpace, SearchSpace, SearchSpaceType, TaggedMultiSearchSpace, @@ -101,7 +101,7 @@ def automatic_optimizer_selector( :return: The batch of points in ``space`` that maximises ``target_func``, with shape [1, D]. """ - if isinstance(space, DiscreteSearchSpace): + if isinstance(space, GeneralDiscreteSearchSpace): return optimize_discrete(space, target_func) elif isinstance(space, (Box, CollectionSearchSpace)): @@ -151,11 +151,11 @@ def _get_max_discrete_points( def optimize_discrete( - space: DiscreteSearchSpace, + space: GeneralDiscreteSearchSpace, target_func: Union[AcquisitionFunction, Tuple[AcquisitionFunction, int]], ) -> TensorType: """ - An :const:`AcquisitionOptimizer` for :class:'DiscreteSearchSpace' spaces. + An :const:`AcquisitionOptimizer` for :class:'GeneralDiscreteSearchSpace' spaces. When this functions receives an acquisition-integer tuple as its `target_func`,it evaluates all the points in the search space for each of the individual V functions making @@ -734,7 +734,7 @@ def get_bounds_of_box_relaxation_around_point( space_with_fixed_discrete = space for tag in space.subspace_tags: if isinstance( - space.get_subspace(tag), DiscreteSearchSpace + space.get_subspace(tag), GeneralDiscreteSearchSpace ): # convert discrete subspaces to box spaces. subspace_value = space.get_subspace_component(tag, current_point) space_with_fixed_discrete = space_with_fixed_discrete.fix_subspace(tag, subspace_value) diff --git a/trieste/models/gpflow/builders.py b/trieste/models/gpflow/builders.py index 638ae55b9d..813cef6bdf 100644 --- a/trieste/models/gpflow/builders.py +++ b/trieste/models/gpflow/builders.py @@ -21,7 +21,7 @@ from __future__ import annotations import math -from typing import Optional, Sequence, Type +from typing import Callable, Optional, Sequence, Type import gpflow import tensorflow as tf @@ -30,9 +30,10 @@ from gpflow.models import GPR, SGPR, SVGP, VGP, GPModel from ...data import Dataset, split_dataset_by_fidelity -from ...space import Box, SearchSpace +from ...space import Box, EncoderFunction, SearchSpace, one_hot_encoded_space, one_hot_encoder from ...types import TensorType from ..gpflow.models import GaussianProcessRegression +from ..interfaces import encode_dataset # NOTE: As a static non-Tensor, this should really be a tf.constant (like the other constants). # However, changing it breaks serialisation during the expected_improvement.pct.py notebook. @@ -88,6 +89,8 @@ def build_gpr( likelihood_variance: Optional[float] = None, trainable_likelihood: bool = False, kernel: Optional[gpflow.kernels.Kernel] = None, + encoder: EncoderFunction | None = None, + space_encoder: Callable[[SearchSpace], SearchSpace] | None = None, ) -> GPR: """ Build a :class:`~gpflow.models.GPR` model with sensible initial parameters and @@ -118,8 +121,20 @@ def build_gpr( non-trainable. By default set to `False`. :param kernel: The kernel to use in the model, defaults to letting the function set up a :class:`~gpflow.kernels.Matern52` kernel. + :param encoder: Encoder with which to transform the dataset before training. Defaults to + one_hot_encoder if the search_space is specified. + :param space_encoder: Encoder with which to transform search_space before generating a kernel. + Defaults to one_hot_encoded_space. :return: A :class:`~gpflow.models.GPR` model. """ + if search_space is not None: + encoder = one_hot_encoder(search_space) if encoder is None else encoder + space_encoder = one_hot_encoded_space if space_encoder is None else space_encoder + search_space = space_encoder(search_space) + + if encoder is not None: + data = encode_dataset(data, encoder) + empirical_mean, empirical_variance, _ = _get_data_stats(data) if kernel is None: diff --git a/trieste/models/gpflow/interface.py b/trieste/models/gpflow/interface.py index 5fad89f679..9448333365 100644 --- a/trieste/models/gpflow/interface.py +++ b/trieste/models/gpflow/interface.py @@ -19,22 +19,24 @@ import gpflow import tensorflow as tf -from check_shapes import inherit_check_shapes from gpflow.models import GPModel from gpflow.posteriors import BasePosterior, PrecomputeCacheType -from typing_extensions import Protocol +from typing_extensions import Protocol, final from ... import logging from ...data import Dataset +from ...space import EncoderFunction from ...types import TensorType from ..interfaces import ( + EncodedProbabilisticModel, + EncodedSupportsPredictJoint, + EncodedSupportsPredictY, + EncodedTrainableProbabilisticModel, HasReparamSampler, ReparametrizationSampler, SupportsGetKernel, SupportsGetObservationNoise, SupportsPredictJoint, - SupportsPredictY, - TrainableProbabilisticModel, ) from ..optimizer import Optimizer from ..utils import ( @@ -46,27 +48,39 @@ class GPflowPredictor( - SupportsPredictJoint, + EncodedSupportsPredictJoint, SupportsGetKernel, SupportsGetObservationNoise, - SupportsPredictY, + EncodedSupportsPredictY, HasReparamSampler, - TrainableProbabilisticModel, + EncodedTrainableProbabilisticModel, + EncodedProbabilisticModel, ABC, ): """A trainable wrapper for a GPflow Gaussian process model.""" - def __init__(self, optimizer: Optimizer | None = None): + def __init__(self, optimizer: Optimizer | None = None, encoder: EncoderFunction | None = None): """ :param optimizer: The optimizer with which to train the model. Defaults to :class:`~trieste.models.optimizer.Optimizer` with :class:`~gpflow.optimizers.Scipy`. + :param encoder: Optional encoder with which to transform query points before + generating predictions. """ if optimizer is None: optimizer = Optimizer(gpflow.optimizers.Scipy(), compile=True) self._optimizer = optimizer + self._encoder = encoder self._posterior: Optional[BasePosterior] = None + @property + def encoder(self) -> EncoderFunction | None: + return self._encoder + + @encoder.setter + def encoder(self, encoder: EncoderFunction | None) -> None: + self._encoder = encoder + @property def optimizer(self) -> Optimizer: """The optimizer with which to train the model.""" @@ -102,16 +116,14 @@ def update_posterior_cache(self) -> None: def model(self) -> GPModel: """The underlying GPflow model.""" - @inherit_check_shapes - def predict(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: mean, cov = (self._posterior or self.model).predict_f(query_points) # posterior predict can return negative variance values [cf GPFlow issue #1813] if self._posterior is not None: cov = tf.clip_by_value(cov, 1e-12, cov.dtype.max) return mean, cov - @inherit_check_shapes - def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + def predict_joint_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: mean, cov = (self._posterior or self.model).predict_f(query_points, full_cov=True) # posterior predict can return negative variance values [cf GPFlow issue #1813] if self._posterior is not None: @@ -120,12 +132,10 @@ def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorTyp ) return mean, cov - @inherit_check_shapes - def sample(self, query_points: TensorType, num_samples: int) -> TensorType: + def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType: return self.model.predict_f_samples(query_points, num_samples) - @inherit_check_shapes - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: return self.model.predict_y(query_points) def get_kernel(self) -> gpflow.kernels.Kernel: @@ -206,3 +216,21 @@ def covariance_between_points( (L being the number of latent GPs = number of output dimensions) """ raise NotImplementedError + + +class EncodedSupportsCovarianceBetweenPoints( + EncodedProbabilisticModel, SupportsCovarianceBetweenPoints +): + @abstractmethod + def covariance_between_points_encoded( + self, query_points_1: TensorType, query_points_2: TensorType + ) -> TensorType: + """Implementation of covariance_between_points on encoded query points.""" + + @final + def covariance_between_points( + self, query_points_1: TensorType, query_points_2: TensorType + ) -> TensorType: + return self.covariance_between_points_encoded( + self.encode(query_points_1), self.encode(query_points_2) + ) diff --git a/trieste/models/gpflow/models.py b/trieste/models/gpflow/models.py index 235325e399..51a29e71ba 100644 --- a/trieste/models/gpflow/models.py +++ b/trieste/models/gpflow/models.py @@ -37,11 +37,12 @@ check_and_extract_fidelity_query_points, split_dataset_by_fidelity, ) +from ...space import EncoderFunction from ...types import TensorType from ...utils import DEFAULTS, jit from ...utils.misc import flatten_leading_dims from ..interfaces import ( - FastUpdateModel, + EncodedFastUpdateModel, HasTrajectorySampler, SupportsCovarianceWithTopFidelity, SupportsGetInducingVariables, @@ -52,7 +53,7 @@ ) from ..optimizer import BatchOptimizer, Optimizer, OptimizeResult from .inducing_point_selectors import InducingPointSelector -from .interface import GPflowPredictor, SupportsCovarianceBetweenPoints +from .interface import EncodedSupportsCovarianceBetweenPoints, GPflowPredictor from .sampler import DecoupledTrajectorySampler, RandomFourierFeatureTrajectorySampler from .utils import ( _covariance_between_points_for_variational_models, @@ -66,8 +67,8 @@ class GaussianProcessRegression( GPflowPredictor, - FastUpdateModel, - SupportsCovarianceBetweenPoints, + EncodedFastUpdateModel, + EncodedSupportsCovarianceBetweenPoints, SupportsGetInternalData, HasTrajectorySampler, ): @@ -90,6 +91,7 @@ def __init__( num_kernel_samples: int = 10, num_rff_features: int = 1000, use_decoupled_sampler: bool = True, + encoder: EncoderFunction | None = None, ): """ :param model: The GPflow model to wrap. @@ -105,8 +107,10 @@ def __init__( :param use_decoupled_sampler: If True use a decoupled random Fourier feature sampler, else just use a random Fourier feature sampler. The decoupled sampler suffers less from overestimating variance and can typically get away with a lower num_rff_features. + :param encoder: Optional encoder with which to transform query points before + generating predictions. """ - super().__init__(optimizer) + super().__init__(optimizer, encoder) self._model = model check_optimizer(self.optimizer) @@ -159,12 +163,11 @@ def _ensure_variable_model_data(self) -> None: ), ) - @inherit_check_shapes - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - f_mean, f_var = self.predict(query_points) + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + f_mean, f_var = self.predict_encoded(query_points) return self.model.likelihood.predict_mean_and_var(query_points, f_mean, f_var) - def update(self, dataset: Dataset) -> None: + def update_encoded(self, dataset: Dataset) -> None: self._ensure_variable_model_data() x, y = self.model.data[0].value(), self.model.data[1].value() @@ -181,7 +184,7 @@ def update(self, dataset: Dataset) -> None: self.model.data[1].assign(dataset.observations) self.update_posterior_cache() - def covariance_between_points( + def covariance_between_points_encoded( self, query_points_1: TensorType, query_points_2: TensorType ) -> TensorType: r""" @@ -249,7 +252,7 @@ def covariance_between_points( return cov - def optimize(self, dataset: Dataset) -> OptimizeResult: + def optimize_encoded(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. @@ -269,7 +272,6 @@ def optimize(self, dataset: Dataset) -> OptimizeResult: :param dataset: The data with which to optimize the `model`. """ - num_trainable_params_with_priors_or_constraints = tf.reduce_sum( [ tf.size(param) @@ -349,7 +351,7 @@ def get_internal_data(self) -> Dataset: """ return Dataset(self.model.data[0], self.model.data[1]) - def conditional_predict_f( + def conditional_predict_f_encoded( self, query_points: TensorType, additional_data: Dataset ) -> tuple[TensorType, TensorType]: """ @@ -374,10 +376,10 @@ def conditional_predict_f( "should have shape [M, D]", ) - mean_add, cov_add = self.predict_joint( + mean_add, cov_add = self.predict_joint_encoded( additional_data.query_points ) # [..., N, L], [..., L, N, N] - mean_qp, var_qp = self.predict(query_points) # [M, L], [M, L] + mean_qp, var_qp = self.predict_encoded(query_points) # [M, L], [M, L] cov_cross = self.covariance_between_points( additional_data.query_points, query_points @@ -414,7 +416,7 @@ def conditional_predict_f( return mean_qp_new, var_qp_new - def conditional_predict_joint( + def conditional_predict_joint_encoded( self, query_points: TensorType, additional_data: Dataset ) -> tuple[TensorType, TensorType]: """ @@ -445,7 +447,7 @@ def conditional_predict_joint( query_points_r = tf.broadcast_to(query_points, new_shape) # [..., M, D] points = tf.concat([additional_data.query_points, query_points_r], axis=-2) # [..., N+M, D] - mean, cov = self.predict_joint(points) # [..., N+M, L], [..., L, N+M, N+M] + mean, cov = self.predict_joint_encoded(points) # [..., N+M, L], [..., L, N+M, N+M] N = tf.shape(additional_data.query_points)[-2] @@ -484,7 +486,7 @@ def conditional_predict_joint( return mean_qp_new, cov_qp_new - def conditional_predict_f_sample( + def conditional_predict_f_sample_encoded( self, query_points: TensorType, additional_data: Dataset, num_samples: int ) -> TensorType: """ @@ -505,7 +507,7 @@ def conditional_predict_f_sample( ) # [..., (S), P, N] return tf.linalg.adjoint(samples) # [..., (S), N, L] - def conditional_predict_y( + def conditional_predict_y_encoded( self, query_points: TensorType, additional_data: Dataset ) -> tuple[TensorType, TensorType]: """ @@ -524,7 +526,7 @@ def conditional_predict_y( class SparseGaussianProcessRegression( GPflowPredictor, - SupportsCovarianceBetweenPoints, + EncodedSupportsCovarianceBetweenPoints, SupportsGetInducingVariables, SupportsGetInternalData, HasTrajectorySampler, @@ -551,6 +553,7 @@ def __init__( inducing_point_selector: Optional[ InducingPointSelector[SparseGaussianProcessRegression] ] = None, + encoder: EncoderFunction | None = None, ): """ :param model: The GPflow model to wrap. @@ -566,8 +569,10 @@ def __init__( :raise NotImplementedError (or ValueError): If we try to use a model with invalid ``num_rff_features``, or an ``inducing_point_selector`` with a model that has more than one set of inducing points. + :param encoder: Optional encoder with which to transform query points before + generating predictions. """ - super().__init__(optimizer) + super().__init__(optimizer, encoder) self._model = model check_optimizer(self.optimizer) @@ -609,9 +614,8 @@ def inducing_point_selector( ) -> Optional[InducingPointSelector[SparseGaussianProcessRegression]]: return self._inducing_point_selector - @inherit_check_shapes - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - f_mean, f_var = self.predict(query_points) + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + f_mean, f_var = self.predict_encoded(query_points) return self.model.likelihood.predict_mean_and_var(query_points, f_mean, f_var) def _ensure_variable_model_data(self) -> None: @@ -637,7 +641,7 @@ def _ensure_variable_model_data(self) -> None: if not is_variable(self._model.num_data): self._model.num_data = tf.Variable(self._model.num_data, trainable=False) - def optimize(self, dataset: Dataset) -> OptimizeResult: + def optimize_encoded(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. @@ -647,7 +651,7 @@ def optimize(self, dataset: Dataset) -> OptimizeResult: self.update_posterior_cache() return result - def update(self, dataset: Dataset) -> None: + def update_encoded(self, dataset: Dataset) -> None: self._ensure_variable_model_data() x, y = self.model.data[0].value(), self.model.data[1].value() @@ -779,7 +783,7 @@ def get_inducing_variables( return inducing_points, q_mu, q_sqrt, whiten - def covariance_between_points( + def covariance_between_points_encoded( self, query_points_1: TensorType, query_points_2: TensorType ) -> TensorType: r""" @@ -837,7 +841,7 @@ def get_internal_data(self) -> Dataset: class SparseVariational( GPflowPredictor, - SupportsCovarianceBetweenPoints, + EncodedSupportsCovarianceBetweenPoints, SupportsGetInducingVariables, HasTrajectorySampler, ): @@ -858,6 +862,7 @@ def __init__( optimizer: Optimizer | None = None, num_rff_features: int = 1000, inducing_point_selector: Optional[InducingPointSelector[SparseVariational]] = None, + encoder: EncoderFunction | None = None, ): """ :param model: The underlying GPflow sparse variational model. @@ -874,6 +879,8 @@ def __init__( the optimization progresses. :raise NotImplementedError: If we try to use an inducing_point_selector with a model that has more than one set of inducing points. + :param encoder: Optional encoder with which to transform query points before + generating predictions. """ tf.debugging.assert_rank( @@ -883,7 +890,7 @@ def __init__( if optimizer is None: optimizer = BatchOptimizer(tf.optimizers.Adam(), batch_size=100, compile=True) - super().__init__(optimizer) + super().__init__(optimizer, encoder) self._model = model if num_rff_features <= 0: @@ -932,12 +939,11 @@ def model(self) -> SVGP: def inducing_point_selector(self) -> Optional[InducingPointSelector[SparseVariational]]: return self._inducing_point_selector - @inherit_check_shapes - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - f_mean, f_var = self.predict(query_points) + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + f_mean, f_var = self.predict_encoded(query_points) return self.model.likelihood.predict_mean_and_var(query_points, f_mean, f_var) - def update(self, dataset: Dataset) -> None: + def update_encoded(self, dataset: Dataset) -> None: self._ensure_variable_model_data() # Hard-code asserts from _assert_data_is_compatible because model doesn't store dataset @@ -979,7 +985,7 @@ def update(self, dataset: Dataset) -> None: self._update_inducing_variables(new_inducing_points) self.update_posterior_cache() - def optimize(self, dataset: Dataset) -> OptimizeResult: + def optimize_encoded(self, dataset: Dataset) -> OptimizeResult: """ Optimize the model with the specified `dataset`. @@ -1017,7 +1023,9 @@ def _update_inducing_variables(self, new_inducing_points: TensorType) -> None: if whiten: new_q_mu, new_q_sqrt = _whiten_points(self, new_inducing_points) else: - new_q_mu, new_f_cov = self.predict_joint(new_inducing_points) # [N, L], [L, N, N] + new_q_mu, new_f_cov = self.predict_joint_encoded( + new_inducing_points + ) # [N, L], [L, N, N] new_q_mu -= self.model.mean_function(new_inducing_points) jitter_mat = DEFAULTS.JITTER * tf.eye( tf.shape(new_inducing_points)[0], dtype=new_f_cov.dtype @@ -1062,7 +1070,7 @@ def get_inducing_variables( return inducing_points, self.model.q_mu, self.model.q_sqrt, self.model.whiten - def covariance_between_points( + def covariance_between_points_encoded( self, query_points_1: TensorType, query_points_2: TensorType ) -> TensorType: r""" @@ -1099,7 +1107,7 @@ def trajectory_sampler(self) -> TrajectorySampler[SparseVariational]: class VariationalGaussianProcess( GPflowPredictor, - SupportsCovarianceBetweenPoints, + EncodedSupportsCovarianceBetweenPoints, SupportsGetInducingVariables, HasTrajectorySampler, ): @@ -1132,6 +1140,7 @@ def __init__( use_natgrads: bool = False, natgrad_gamma: Optional[float] = None, num_rff_features: int = 1000, + encoder: EncoderFunction | None = None, ): """ :param model: The GPflow :class:`~gpflow.models.VGP`. @@ -1150,6 +1159,8 @@ def __init__( :raise ValueError (or InvalidArgumentError): If ``model``'s :attr:`q_sqrt` is not rank 3 or if attempting to combine natural gradients with a :class:`~gpflow.optimizers.Scipy` optimizer. + :param encoder: Optional encoder with which to transform query points before + generating predictions. """ tf.debugging.assert_rank(model.q_sqrt, 3) @@ -1158,7 +1169,7 @@ def __init__( elif optimizer is None and use_natgrads: optimizer = BatchOptimizer(tf.optimizers.Adam(), batch_size=100, compile=True) - super().__init__(optimizer) + super().__init__(optimizer, encoder) check_optimizer(self.optimizer) @@ -1245,12 +1256,11 @@ def __repr__(self) -> str: def model(self) -> VGP: return self._model - @inherit_check_shapes - def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: - f_mean, f_var = self.predict(query_points) + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + f_mean, f_var = self.predict_encoded(query_points) return self.model.likelihood.predict_mean_and_var(query_points, f_mean, f_var) - def update(self, dataset: Dataset, *, jitter: float = DEFAULTS.JITTER) -> None: + def update_encoded(self, dataset: Dataset, *, jitter: float = DEFAULTS.JITTER) -> None: """ Update the model given the specified ``dataset``. Does not train the model. @@ -1262,7 +1272,7 @@ def update(self, dataset: Dataset, *, jitter: float = DEFAULTS.JITTER) -> None: update_vgp_data(self.model, (dataset.query_points, dataset.observations)) self.update_posterior_cache() - def optimize(self, dataset: Dataset) -> Optional[OptimizeResult]: + def optimize_encoded(self, dataset: Dataset) -> Optional[OptimizeResult]: """ :class:`VariationalGaussianProcess` has a custom `optimize` method that (optionally) permits alternating between standard optimization steps (for kernel parameters) and natural gradient @@ -1343,7 +1353,7 @@ def trajectory_sampler(self) -> TrajectorySampler[VariationalGaussianProcess]: return DecoupledTrajectorySampler(self, self._num_rff_features) - def covariance_between_points( + def covariance_between_points_encoded( self, query_points_1: TensorType, query_points_2: TensorType ) -> TensorType: r""" diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py index ae265d4f85..223408ce1d 100644 --- a/trieste/models/interfaces.py +++ b/trieste/models/interfaces.py @@ -15,14 +15,15 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any, Callable, Generic, Optional, Sequence, TypeVar +from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, overload import gpflow import tensorflow as tf -from check_shapes import check_shapes -from typing_extensions import Protocol, runtime_checkable +from check_shapes import check_shapes, inherit_check_shapes +from typing_extensions import Protocol, final, runtime_checkable from ..data import Dataset +from ..space import EncoderFunction from ..types import TensorType from ..utils import DEFAULTS @@ -742,3 +743,164 @@ def covariance_with_top_fidelity(self, query_points: TensorType) -> TensorType: :return: The covariance with the top fidelity for the `query_points`, of shape [N, P] """ raise NotImplementedError + + +def encode_dataset(dataset: Dataset, encoder: EncoderFunction) -> Dataset: + """Return a new Dataset with the query points encoded using the given encoder.""" + return Dataset(encoder(dataset.query_points), dataset.observations) + + +class EncodedProbabilisticModel(ProbabilisticModel): + """A probabilistic model with an associated query point encoder. + + Classes that inherit from this (or the other associated mixins below) should implement the + relevant _encoded methods (e.g. predict_encoded instead of predict), to which the public + methods delegate after encoding their input. Take care to use the correct methods internally + to avoid encoding twice accidentally. + """ + + @property + @abstractmethod + def encoder(self) -> EncoderFunction | None: + """Query point encoder.""" + + @overload + def encode(self, points: TensorType) -> TensorType: + ... + + @overload + def encode(self, points: Dataset) -> Dataset: + ... + + def encode(self, points: Dataset | TensorType) -> Dataset | TensorType: + """Encode points or a Dataset using the query point encoder.""" + if self.encoder is None: + return points + elif isinstance(points, Dataset): + return encode_dataset(points, self.encoder) + else: + return self.encoder(points) + + @abstractmethod + def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + """Implementation of predict on encoded query points.""" + + @abstractmethod + def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType: + """Implementation of sample on encoded query points.""" + + @final + @inherit_check_shapes + def predict(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + return self.predict_encoded(self.encode(query_points)) + + @final + @inherit_check_shapes + def sample(self, query_points: TensorType, num_samples: int) -> TensorType: + return self.sample_encoded(self.encode(query_points), num_samples) + + +class EncodedTrainableProbabilisticModel(EncodedProbabilisticModel, TrainableProbabilisticModel): + """A trainable probabilistic model with an associated query point encoder.""" + + @abstractmethod + def update_encoded(self, dataset: Dataset) -> None: + """Implementation of update on the encoded dataset.""" + + @abstractmethod + def optimize_encoded(self, dataset: Dataset) -> Any: + """Implementation of optimize on the encoded dataset.""" + + @final + def update(self, dataset: Dataset) -> None: + return self.update_encoded(self.encode(dataset)) + + @final + def optimize(self, dataset: Dataset) -> Any: + return self.optimize_encoded(self.encode(dataset)) + + +class EncodedSupportsPredictJoint(EncodedProbabilisticModel, SupportsPredictJoint): + """A probabilistic model that supports predict_joint with an associated query point encoder.""" + + @abstractmethod + def predict_joint_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + """Implementation of predict_joint on encoded query points.""" + + @final + @inherit_check_shapes + def predict_joint(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + return self.predict_joint_encoded(self.encode(query_points)) + + +class EncodedSupportsPredictY(EncodedProbabilisticModel, SupportsPredictY): + """A probabilistic model that supports predict_y with an associated query point encoder.""" + + @abstractmethod + def predict_y_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + """Implementation of predict_y on encoded query points.""" + + @final + @inherit_check_shapes + def predict_y(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + return self.predict_y_encoded(self.encode(query_points)) + + +class EncodedFastUpdateModel(EncodedProbabilisticModel, FastUpdateModel): + """A fast update model with an associated query point encoder.""" + + @abstractmethod + def conditional_predict_f_encoded( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + """Implementation of conditional_predict_f on encoded query points.""" + + @abstractmethod + def conditional_predict_joint_encoded( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + """Implementation of conditional_predict_joint on encoded query points.""" + + @abstractmethod + def conditional_predict_f_sample_encoded( + self, query_points: TensorType, additional_data: Dataset, num_samples: int + ) -> TensorType: + """Implementation of conditional_predict_f_sample on encoded query points.""" + + @abstractmethod + def conditional_predict_y_encoded( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + """Implementation of conditional_predict_y on encoded query points.""" + + @final + def conditional_predict_f( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + return self.conditional_predict_f_encoded( + self.encode(query_points), self.encode(additional_data) + ) + + @final + def conditional_predict_joint( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + return self.conditional_predict_joint_encoded( + self.encode(query_points), self.encode(additional_data) + ) + + @final + def conditional_predict_f_sample( + self, query_points: TensorType, additional_data: Dataset, num_samples: int + ) -> TensorType: + return self.conditional_predict_f_sample_encoded( + self.encode(query_points), self.encode(additional_data), num_samples + ) + + @final + def conditional_predict_y( + self, query_points: TensorType, additional_data: Dataset + ) -> tuple[TensorType, TensorType]: + return self.conditional_predict_y_encoded( + self.encode(query_points), self.encode(additional_data) + ) diff --git a/trieste/models/keras/interface.py b/trieste/models/keras/interface.py index 18a4afbc48..6868193aa7 100644 --- a/trieste/models/keras/interface.py +++ b/trieste/models/keras/interface.py @@ -19,31 +19,38 @@ import tensorflow as tf import tensorflow_probability as tfp -from check_shapes import inherit_check_shapes from typing_extensions import Protocol, runtime_checkable from ...data import Dataset +from ...space import EncoderFunction from ...types import TensorType -from ..interfaces import ProbabilisticModel +from ..interfaces import EncodedProbabilisticModel, ProbabilisticModel from ..optimizer import KerasOptimizer -class KerasPredictor(ProbabilisticModel, ABC): +class KerasPredictor(EncodedProbabilisticModel, ABC): """ This is an interface for trainable wrappers of TensorFlow and Keras neural network models. """ - def __init__(self, optimizer: Optional[KerasOptimizer] = None): + def __init__( + self, + optimizer: Optional[KerasOptimizer] = None, + encoder: EncoderFunction | None = None, + ): """ :param optimizer: The optimizer wrapper containing the optimizer with which to train the model and arguments for the wrapper and the optimizer. The optimizer must be an instance of a :class:`~tf.optimizers.Optimizer`. Defaults to :class:`~tf.optimizers.Adam` optimizer with default parameters. + :param encoder: Optional encoder with which to transform query points before + generating predictions. :raise ValueError: If the optimizer is not an instance of :class:`~tf.optimizers.Optimizer`. """ if optimizer is None: optimizer = KerasOptimizer(tf.optimizers.Adam()) self._optimizer = optimizer + self._encoder = encoder if not isinstance(optimizer.optimizer, tf.optimizers.Optimizer): raise ValueError( @@ -62,12 +69,18 @@ def optimizer(self) -> KerasOptimizer: """The optimizer wrapper for training the model.""" return self._optimizer - @inherit_check_shapes - def predict(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + @property + def encoder(self) -> EncoderFunction | None: + return self._encoder + + @encoder.setter + def encoder(self, encoder: EncoderFunction | None) -> None: + self._encoder = encoder + + def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: return self.model.predict(query_points) - @inherit_check_shapes - def sample(self, query_points: TensorType, num_samples: int) -> TensorType: + def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType: raise NotImplementedError( """ KerasPredictor does not implement sampling. Acquisition diff --git a/trieste/models/keras/models.py b/trieste/models/keras/models.py index 10e27acc6f..3e6cabc9c9 100644 --- a/trieste/models/keras/models.py +++ b/trieste/models/keras/models.py @@ -22,14 +22,14 @@ import tensorflow as tf import tensorflow_probability as tfp import tensorflow_probability.python.distributions as tfd -from check_shapes import inherit_check_shapes from tensorflow.python.keras.callbacks import Callback from ... import logging from ...data import Dataset +from ...space import EncoderFunction from ...types import TensorType from ...utils import flatten_leading_dims -from ..interfaces import HasTrajectorySampler, TrainableProbabilisticModel, TrajectorySampler +from ..interfaces import EncodedTrainableProbabilisticModel, HasTrajectorySampler, TrajectorySampler from ..optimizer import KerasOptimizer from ..utils import write_summary_data_based_metrics from .architectures import KerasEnsemble, MultivariateNormalTriL @@ -39,7 +39,10 @@ class DeepEnsemble( - KerasPredictor, TrainableProbabilisticModel, DeepEnsembleModel, HasTrajectorySampler + KerasPredictor, + EncodedTrainableProbabilisticModel, + DeepEnsembleModel, + HasTrajectorySampler, ): """ A :class:`~trieste.model.TrainableProbabilisticModel` wrapper for deep ensembles built using @@ -75,7 +78,7 @@ class DeepEnsemble( behaviour you would like, you will need to subclass the model and overwrite the :meth:`optimize` method. - Currently we do not support setting up the model with dictionary config. + Currently, we do not support setting up the model with dictionary config. """ def __init__( @@ -86,6 +89,7 @@ def __init__( diversify: bool = False, continuous_optimisation: bool = True, compile_args: Optional[Mapping[str, Any]] = None, + encoder: EncoderFunction | None = None, ) -> None: """ :param model: A Keras ensemble model with probabilistic networks as ensemble members. The @@ -98,12 +102,12 @@ def __init__( See https://keras.io/api/models/model_training_apis/#fit-method for a list of possible arguments. :param bootstrap: Sample with replacement data for training each network in the ensemble. - By default set to `False`. + By default, set to `False`. :param diversify: Whether to use quantiles from the approximate Gaussian distribution of the ensemble as trajectories instead of mean predictions when calling :meth:`trajectory_sampler`. This mode can be used to increase the diversity in case of optimizing very large batches of trajectories. By - default set to `False`. + default, set to `False`. :param continuous_optimisation: If True (default), the optimizer will keep track of the number of epochs across BO iterations and use this number as initial_epoch. This is essential to allow monitoring of model training across BO iterations. @@ -112,6 +116,8 @@ def __init__( See https://keras.io/api/models/model_training_apis/#compile-method for a list of possible arguments. The ``optimizer``, ``loss`` and ``metrics`` arguments must not be included. + :param encoder: Optional encoder with which to transform query points before + generating predictions. :raise ValueError: If ``model`` is not an instance of :class:`~trieste.models.keras.KerasEnsemble`, or ensemble has less than two base learners (networks), or `compile_args` contains disallowed arguments. @@ -119,7 +125,7 @@ def __init__( if model.ensemble_size < 2: raise ValueError(f"Ensemble size must be greater than 1 but got {model.ensemble_size}.") - super().__init__(optimizer) + super().__init__(optimizer, encoder) if compile_args is None: compile_args = {} @@ -244,8 +250,7 @@ def ensemble_distributions(self, query_points: TensorType) -> tuple[tfd.Distribu x_transformed: dict[str, TensorType] = self.prepare_query_points(query_points) return self._model.model(x_transformed) - @inherit_check_shapes - def predict(self, query_points: TensorType) -> tuple[TensorType, TensorType]: + def predict_encoded(self, query_points: TensorType) -> tuple[TensorType, TensorType]: r""" Returns mean and variance at ``query_points`` for the whole ensemble. @@ -308,14 +313,13 @@ def predict_ensemble(self, query_points: TensorType) -> tuple[TensorType, Tensor :return: The predicted mean and variance of the observations at the specified ``query_points`` for each member of the ensemble. """ - ensemble_distributions = self.ensemble_distributions(query_points) + ensemble_distributions = self.ensemble_distributions(self.encode(query_points)) predicted_means = tf.convert_to_tensor([dist.mean() for dist in ensemble_distributions]) predicted_vars = tf.convert_to_tensor([dist.variance() for dist in ensemble_distributions]) return predicted_means, predicted_vars - @inherit_check_shapes - def sample(self, query_points: TensorType, num_samples: int) -> TensorType: + def sample_encoded(self, query_points: TensorType, num_samples: int) -> TensorType: """ Return ``num_samples`` samples at ``query_points``. We use the mixture approximation in :meth:`predict` for ``query_points`` and sample ``num_samples`` times from a Gaussian @@ -327,7 +331,7 @@ def sample(self, query_points: TensorType, num_samples: int) -> TensorType: [..., S, N] + E, where S is the number of samples. """ - predicted_means, predicted_vars = self.predict(query_points) + predicted_means, predicted_vars = self.predict_encoded(query_points) normal = tfp.distributions.Normal(predicted_means, tf.sqrt(predicted_vars)) samples = normal.sample(num_samples) @@ -345,7 +349,7 @@ def sample_ensemble(self, query_points: TensorType, num_samples: int) -> TensorT :return: The samples. For a predictive distribution with event shape E, this has shape [..., S, N] + E, where S is the number of samples. """ - ensemble_distributions = self.ensemble_distributions(query_points) + ensemble_distributions = self.ensemble_distributions(self.encode(query_points)) network_indices = sample_model_index(self.ensemble_size, num_samples) stacked_samples = [] @@ -365,7 +369,7 @@ def trajectory_sampler(self) -> TrajectorySampler[DeepEnsemble]: """ return DeepEnsembleTrajectorySampler(self, self._diversify) - def update(self, dataset: Dataset) -> None: + def update_encoded(self, dataset: Dataset) -> None: """ Neural networks are parametric models and do not need to update data. `TrainableProbabilisticModel` interface, however, requires an update method, so @@ -373,7 +377,7 @@ def update(self, dataset: Dataset) -> None: """ return - def optimize(self, dataset: Dataset) -> keras.callbacks.History: + def optimize_encoded(self, dataset: Dataset) -> keras.callbacks.History: """ Optimize the underlying Keras ensemble model with the specified ``dataset``. diff --git a/trieste/objectives/multi_objectives.py b/trieste/objectives/multi_objectives.py index f8708831a8..4a4a9bf04b 100644 --- a/trieste/objectives/multi_objectives.py +++ b/trieste/objectives/multi_objectives.py @@ -24,7 +24,7 @@ from check_shapes import check_shape, check_shapes from typing_extensions import Protocol -from ..space import Box +from ..space import Box, SearchSpaceType from ..types import TensorType from .single_objectives import ObjectiveTestProblem @@ -44,7 +44,7 @@ def __call__(self, n: int, seed: int | None = None) -> TensorType: @dataclass(frozen=True) -class MultiObjectiveTestProblem(ObjectiveTestProblem): +class MultiObjectiveTestProblem(ObjectiveTestProblem[SearchSpaceType]): """ Convenience container class for synthetic multi-objective test functions, containing a generator for the pareto optimal points, which can be used as a reference of performance @@ -73,7 +73,7 @@ def vlmop2(x: TensorType, d: int) -> TensorType: return tf.stack([y1, y2], axis=-1) -def VLMOP2(input_dim: int) -> MultiObjectiveTestProblem: +def VLMOP2(input_dim: int) -> MultiObjectiveTestProblem[Box]: """ The VLMOP2 problem, typically evaluated over :math:`[-2, 2]^d`. The idea pareto fronts lies on -1/sqrt(d) - 1/sqrt(d) and x1=...=xdim. @@ -152,7 +152,7 @@ def g(xM: TensorType) -> TensorType: ) -def DTLZ1(input_dim: int, num_objective: int) -> MultiObjectiveTestProblem: +def DTLZ1(input_dim: int, num_objective: int) -> MultiObjectiveTestProblem[Box]: """ The DTLZ1 problem, the idea pareto fronts lie on a linear hyper-plane. See :cite:`deb2002scalable` for details. @@ -212,7 +212,7 @@ def g(xM: TensorType) -> TensorType: ) -def DTLZ2(input_dim: int, num_objective: int) -> MultiObjectiveTestProblem: +def DTLZ2(input_dim: int, num_objective: int) -> MultiObjectiveTestProblem[Box]: """ The DTLZ2 problem, the idea pareto fronts lie on (part of) a unit hyper sphere. See :cite:`deb2002scalable` for details. diff --git a/trieste/objectives/multifidelity_objectives.py b/trieste/objectives/multifidelity_objectives.py index 4435f57d83..aa5c37942e 100644 --- a/trieste/objectives/multifidelity_objectives.py +++ b/trieste/objectives/multifidelity_objectives.py @@ -19,13 +19,13 @@ import numpy as np import tensorflow as tf -from ..space import Box, DiscreteSearchSpace, SearchSpace, TaggedProductSearchSpace +from ..space import Box, DiscreteSearchSpace, SearchSpace, SearchSpaceType, TaggedProductSearchSpace from ..types import TensorType from .single_objectives import SingleObjectiveTestProblem @dataclass(frozen=True) -class SingleObjectiveMultifidelityTestProblem(SingleObjectiveTestProblem): +class SingleObjectiveMultifidelityTestProblem(SingleObjectiveTestProblem[SearchSpaceType]): num_fidelities: int """The number of fidelities of test function""" diff --git a/trieste/objectives/single_objectives.py b/trieste/objectives/single_objectives.py index c0fd377403..a15ab70808 100644 --- a/trieste/objectives/single_objectives.py +++ b/trieste/objectives/single_objectives.py @@ -23,12 +23,12 @@ import math from dataclasses import dataclass from math import pi -from typing import Callable, Sequence +from typing import Callable, Generic, Sequence import tensorflow as tf from check_shapes import check_shapes -from ..space import Box, Constraint, LinearConstraint, NonlinearConstraint +from ..space import Box, Constraint, LinearConstraint, NonlinearConstraint, SearchSpaceType from ..types import TensorType ObjectiveTestFunction = Callable[[TensorType], TensorType] @@ -36,7 +36,7 @@ @dataclass(frozen=True) -class ObjectiveTestProblem: +class ObjectiveTestProblem(Generic[SearchSpaceType]): """ Convenience container class for synthetic objective test functions. """ @@ -47,7 +47,7 @@ class ObjectiveTestProblem: objective: ObjectiveTestFunction """The synthetic test function""" - search_space: Box + search_space: SearchSpaceType """The (continuous) search space of the test function""" @property @@ -62,7 +62,7 @@ def bounds(self) -> list[list[float]]: @dataclass(frozen=True) -class SingleObjectiveTestProblem(ObjectiveTestProblem): +class SingleObjectiveTestProblem(ObjectiveTestProblem[SearchSpaceType]): """ Convenience container class for synthetic single-objective test functions, including the global minimizers and minimum. diff --git a/trieste/space.py b/trieste/space.py index 326cc052b5..4a228460cf 100644 --- a/trieste/space.py +++ b/trieste/space.py @@ -518,6 +518,20 @@ def one_hot_encoder(space: SearchSpace) -> EncoderFunction: return space.one_hot_encoder if isinstance(space, HasOneHotEncoder) else lambda x: x +def one_hot_encoded_space(space: SearchSpace) -> SearchSpace: + "A bounded search space corresponding to the one-hot encoding of the given space." + + if isinstance(space, GeneralDiscreteSearchSpace) and isinstance(space, HasOneHotEncoder): + return DiscreteSearchSpace(space.one_hot_encoder(space.points)) + elif isinstance(space, TaggedProductSearchSpace): + spaces = [one_hot_encoded_space(space.get_subspace(tag)) for tag in space.subspace_tags] + return TaggedProductSearchSpace(spaces=spaces, tags=space.subspace_tags) + elif isinstance(space, HasOneHotEncoder): + raise NotImplementedError(f"Unsupported one-hot-encoded space {type(space)}") + else: + return space + + class CategoricalSearchSpace(GeneralDiscreteSearchSpace, HasOneHotEncoder): r""" A categorical :class:`SearchSpace` representing a finite set :math:`\mathcal{C}` of categories, @@ -586,6 +600,7 @@ def __init__( tags = [tuple(ts) for ts in category_names] self._tags = tags + self._dtype = dtype ranges = [tf.range(len(ts), dtype=dtype) for ts in tags] meshgrid = tf.meshgrid(*ranges, indexing="ij") @@ -633,7 +648,11 @@ def encoder(x: TensorType) -> TensorType: for ts in self.tags ] encoded = tf.concat( - [encoder(column) for encoder, column in zip(encoders, columns)], axis=1 + [ + tf.cast(encoder(column), dtype=self._dtype) + for encoder, column in zip(encoders, columns) + ], + axis=1, ) return unflatten(encoded)