diff --git a/src/lop/active_learning/MutualInfoLearner.py b/src/lop/active_learning/MutualInfoLearner.py new file mode 100644 index 0000000..e026285 --- /dev/null +++ b/src/lop/active_learning/MutualInfoLearner.py @@ -0,0 +1,105 @@ +# Copyright 2023 Ian Rankin +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +# to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or +# substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# UCBLearner.py +# Written Ian Rankin - December 2023 +# +# Upper confidence bound learning algorithm + +import numpy as np + +from lop.active_learning import ActiveLearner +from lop.models import PreferenceGP, GP, PreferenceLinear + +from lop.utilities.human_choice_model import p_human_choice + +class MutualInfoLearner(ActiveLearner): + ## Constructor + # @param fake_fun - [opt default None] A fake function to scale the parameter space for GPs? (I actually don't remember why this exists) + # @param default_to_pareto - [opt default=False] sets whether to always assume + # prefering pareto optimal choices when selecting points, if not particulary told not to + # @param alaways_select_best - [opt default=False] sets whether the select function should append the + # the top solution to the front of the solution set every time. + def __init__(self, fake_func=None, default_to_pareto=False, always_select_best=False): + super(MutualInfoLearner, self).__init__(default_to_pareto,always_select_best) + self.M = 75 # random value at the moment + self.peakiness = 10 + self.fake_func = fake_func + + + + ## select_greedy + # This function greedily selects the best single data point + # Depending on the selection method, you are not forced to implement this function + # @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions + # @param mu - a numpy array of mu values outputed from predict. numpy (n) + # @param data - a user defined tuple of data (determined by the predict function of the model) + # @param indicies - a list or set of indicies in candidate points to consider. + # @param prev_selection - a set ofindicies of previously selected points + # + # @return the index of the greedy selection. + def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): + if isinstance(self.model, (PreferenceGP, GP)): + variance = data + cov = self.model.cov + + indicies = list(indicies) + prev_selection = list(prev_selection) + + # sample M possible parameters w (reward values of the GP) + all_w = np.random.multivariate_normal(mu, cov, size=self.M) + + if self.fake_func is not None: + fake_f_mean = np.mean(self.fake_func(candidate_pts)) + samp_mean = np.mean(all_w) + + print('Scaling using fake function: ' + str(fake_f_mean / samp_mean)) + all_w = all_w * (fake_f_mean / samp_mean) + + + info_gain = [self.calc_info_gain(prev_selection + [idx], all_w) for idx in indicies] + + return indicies[np.argmax(info_gain)] + + + elif isinstance(self.model, PreferenceLinear): + raise NotImplementedError("Have not implemented UCB with linear preferences") + + + # calculate the info gain for a query Q given the sampled parameters / reward W + # only need p(q|w,Q) human choice model given w = rewards and Q is the particular query. + # shouldn't this need p(w) as well? No because it is sampled from the distribution + # Can I solve that exactly with the GP? + # + # @param Q - list of indicies of query. + # @param all_w - a matrix of possible rewards for sample set of parameters [M,N] + # M - number of samples + # N - dimension of candidate points. + # + def calc_info_gain(self, Q, all_w): + # Find the probabilities of human selecting a query given the possible reward values + p = p_human_choice(all_w[:,Q], self.peakiness) + # find the sum of the probabilities of w + sum_p_over_w = np.sum(p, axis=0) + + # Find the information gain using the sample equation (4) in [1] + info_gain = np.sum(p * np.log2(self.M * p / sum_p_over_w)) / self.M + + return info_gain + + diff --git a/src/lop/active_learning/__init__.py b/src/lop/active_learning/__init__.py index 1905cc1..0a5c24b 100644 --- a/src/lop/active_learning/__init__.py +++ b/src/lop/active_learning/__init__.py @@ -5,3 +5,4 @@ from .UCBLearner import UCBLearner from .RandomLearner import RandomLearner from .GV_UCBLearner import GV_UCBLearner +from .MutualInfoLearner import MutualInfoLearner diff --git a/tests/active_learning/test_GV_UCB_learner.py b/tests/active_learning/test_GV_UCB_learner.py index b148488..62c384a 100644 --- a/tests/active_learning/test_GV_UCB_learner.py +++ b/tests/active_learning/test_GV_UCB_learner.py @@ -21,7 +21,7 @@ def test_GV_UCB_learner_constructs(): assert isinstance(al, lop.GV_UCBLearner) assert isinstance(model, lop.Model) -def test_UCB_learner_trains_basic_GP(): +def test_GV_UCB_learner_trains_basic_GP(): al = lop.GV_UCBLearner() model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al) diff --git a/tests/active_learning/test_mutual_info_learner.py b/tests/active_learning/test_mutual_info_learner.py new file mode 100644 index 0000000..9e28387 --- /dev/null +++ b/tests/active_learning/test_mutual_info_learner.py @@ -0,0 +1,48 @@ +# test_UCB_learner.py +# Written Ian Rankin - December 2023 +# + +import pytest + +import numpy as np +import lop + + + +# the function to approximate +def f_sin(x, data=None): + return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x)) + + +def test_mutual_info_learner_constructs(): + al = lop.MutualInfoLearner() + model = lop.Model(active_learner=al) + + assert isinstance(al, lop.MutualInfoLearner) + assert isinstance(model, lop.Model) + +def test_mutual_info_learner_trains_basic_GP(): + al = lop.MutualInfoLearner() + model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al) + + + np.random.seed(5) # just to ensure it doesn't break the test on a bad dice roll + for i in range(10): + # generate random test set to select test point from + x_canidiates = np.random.random(20)*10 + + test_pt_idxs = model.select(x_canidiates, 2) + + + x_train = x_canidiates[test_pt_idxs] + y_train = f_sin(x_train) + + model.add(x_train, y_train) + + + x_test = np.array([0,1,2,3,4.5,7,9]) + y_test = f_sin(x_test) + y_pred = model(x_test) + + assert (np.abs(y_pred - y_test) < 0.2).all() + diff --git a/tests/active_learning/test_random_learner.py b/tests/active_learning/test_random_learner.py index 6bc589d..89d4915 100644 --- a/tests/active_learning/test_random_learner.py +++ b/tests/active_learning/test_random_learner.py @@ -14,14 +14,14 @@ def f_sin(x, data=None): return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x)) -def test_UCB_learner_constructs(): +def test_random_learner_constructs(): al = lop.RandomLearner() model = lop.Model(active_learner=al) assert isinstance(al, lop.RandomLearner) assert isinstance(model, lop.Model) -def test_UCB_learner_trains_basic_GP(): +def test_random_learner_trains_basic_GP(): al = lop.RandomLearner() model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)