Added Mutual information gain

Robotic-Decision-Making-Lab · Dec 20, 2023 · d7422bc · d7422bc
1 parent 3142216
commit d7422bc
Show file tree

Hide file tree

Showing 5 changed files with 157 additions and 3 deletions.
diff --git a/src/lop/active_learning/MutualInfoLearner.py b/src/lop/active_learning/MutualInfoLearner.py
@@ -0,0 +1,105 @@
+# Copyright 2023 Ian Rankin
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
+# to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
+# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# UCBLearner.py
+# Written Ian Rankin - December 2023
+#
+# Upper confidence bound learning algorithm
+
+import numpy as np
+
+from lop.active_learning import ActiveLearner
+from lop.models import PreferenceGP, GP, PreferenceLinear
+
+from lop.utilities.human_choice_model import p_human_choice
+
+class MutualInfoLearner(ActiveLearner):
+    ## Constructor
+    # @param fake_fun - [opt default None] A fake function to scale the parameter space for GPs? (I actually don't remember why this exists)
+    # @param default_to_pareto - [opt default=False] sets whether to always assume
+    #               prefering pareto optimal choices when selecting points, if not particulary told not to
+    # @param alaways_select_best - [opt default=False] sets whether the select function should append the
+    #               the top solution to the front of the solution set every time.
+    def __init__(self, fake_func=None, default_to_pareto=False, always_select_best=False):
+        super(MutualInfoLearner, self).__init__(default_to_pareto,always_select_best)
+        self.M = 75 # random value at the moment
+        self.peakiness = 10
+        self.fake_func = fake_func
+
+
+
+    ## select_greedy
+    # This function greedily selects the best single data point
+    # Depending on the selection method, you are not forced to implement this function
+    # @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions
+    # @param mu - a numpy array of mu values outputed from predict. numpy (n)
+    # @param data - a user defined tuple of data (determined by the predict function of the model)
+    # @param indicies - a list or set of indicies in candidate points to consider.
+    # @param prev_selection - a set ofindicies of previously selected points
+    #
+    # @return the index of the greedy selection.
+    def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
+        if isinstance(self.model, (PreferenceGP, GP)):
+            variance = data
+            cov = self.model.cov
+
+            indicies = list(indicies)
+            prev_selection = list(prev_selection)
+
+            # sample M possible parameters w (reward values of the GP)
+            all_w = np.random.multivariate_normal(mu, cov, size=self.M)
+
+            if self.fake_func is not None:
+                fake_f_mean = np.mean(self.fake_func(candidate_pts))
+                samp_mean = np.mean(all_w)
+
+                print('Scaling using fake function: ' + str(fake_f_mean / samp_mean))
+                all_w = all_w * (fake_f_mean / samp_mean)
+
+
+            info_gain = [self.calc_info_gain(prev_selection + [idx], all_w) for idx in indicies]
+
+            return indicies[np.argmax(info_gain)]
+
+
+        elif isinstance(self.model, PreferenceLinear):
+            raise NotImplementedError("Have not implemented UCB with linear preferences")
+
+
+    # calculate the info gain for a query Q given the sampled parameters / reward W
+    # only need p(q|w,Q) human choice model given w = rewards and Q is the particular query.
+    # shouldn't this need p(w) as well? No because it is sampled from the distribution
+    # Can I solve that exactly with the GP?
+    #
+    # @param Q - list of indicies of query.
+    # @param all_w - a matrix of possible rewards for sample set of parameters [M,N]
+    #                   M - number of samples
+    #                   N - dimension of candidate points.
+    #
+    def calc_info_gain(self, Q, all_w):
+        # Find the probabilities of human selecting a query given the possible reward values
+        p = p_human_choice(all_w[:,Q], self.peakiness)
+        # find the sum of the probabilities of w
+        sum_p_over_w = np.sum(p, axis=0)
+
+        # Find the information gain using the sample equation (4) in [1]
+        info_gain = np.sum(p * np.log2(self.M * p / sum_p_over_w)) / self.M
+
+        return info_gain
+
+
diff --git a/src/lop/active_learning/__init__.py b/src/lop/active_learning/__init__.py
@@ -5,3 +5,4 @@
 from .UCBLearner import UCBLearner
 from .RandomLearner import RandomLearner
 from .GV_UCBLearner import GV_UCBLearner
+from .MutualInfoLearner import MutualInfoLearner
diff --git a/tests/active_learning/test_GV_UCB_learner.py b/tests/active_learning/test_GV_UCB_learner.py
@@ -21,7 +21,7 @@ def test_GV_UCB_learner_constructs():
     assert isinstance(al, lop.GV_UCBLearner)
     assert isinstance(model, lop.Model)
 
-def test_UCB_learner_trains_basic_GP():
+def test_GV_UCB_learner_trains_basic_GP():
     al = lop.GV_UCBLearner()
     model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)
 

diff --git a/tests/active_learning/test_mutual_info_learner.py b/tests/active_learning/test_mutual_info_learner.py
@@ -0,0 +1,48 @@
+# test_UCB_learner.py
+# Written Ian Rankin - December 2023
+#
+
+import pytest
+
+import numpy as np
+import lop
+
+
+
+# the function to approximate
+def f_sin(x, data=None):
+    return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))
+
+
+def test_mutual_info_learner_constructs():
+    al = lop.MutualInfoLearner()
+    model = lop.Model(active_learner=al)
+
+    assert isinstance(al, lop.MutualInfoLearner)
+    assert isinstance(model, lop.Model)
+
+def test_mutual_info_learner_trains_basic_GP():
+    al = lop.MutualInfoLearner()
+    model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)
+
+
+    np.random.seed(5) # just to ensure it doesn't break the test on a bad dice roll
+    for i in range(10):
+        # generate random test set to select test point from
+        x_canidiates = np.random.random(20)*10
+
+        test_pt_idxs = model.select(x_canidiates, 2)
+
+
+        x_train = x_canidiates[test_pt_idxs]
+        y_train = f_sin(x_train)
+
+        model.add(x_train, y_train)
+
+
+    x_test = np.array([0,1,2,3,4.5,7,9])
+    y_test = f_sin(x_test)
+    y_pred = model(x_test)
+
+    assert (np.abs(y_pred - y_test) < 0.2).all()
+
diff --git a/tests/active_learning/test_random_learner.py b/tests/active_learning/test_random_learner.py
@@ -14,14 +14,14 @@ def f_sin(x, data=None):
     return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))
 
 
-def test_UCB_learner_constructs():
+def test_random_learner_constructs():
     al = lop.RandomLearner()
     model = lop.Model(active_learner=al)
 
     assert isinstance(al, lop.RandomLearner)
     assert isinstance(model, lop.Model)
 
-def test_UCB_learner_trains_basic_GP():
+def test_random_learner_trains_basic_GP():
     al = lop.RandomLearner()
     model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)