Skip to content

Commit

Permalink
Added Mutual information gain
Browse files Browse the repository at this point in the history
  • Loading branch information
ianran committed Dec 20, 2023
1 parent 3142216 commit d7422bc
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 3 deletions.
105 changes: 105 additions & 0 deletions src/lop/active_learning/MutualInfoLearner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright 2023 Ian Rankin
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
# to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# UCBLearner.py
# Written Ian Rankin - December 2023
#
# Upper confidence bound learning algorithm

import numpy as np

from lop.active_learning import ActiveLearner
from lop.models import PreferenceGP, GP, PreferenceLinear

from lop.utilities.human_choice_model import p_human_choice

class MutualInfoLearner(ActiveLearner):
## Constructor
# @param fake_fun - [opt default None] A fake function to scale the parameter space for GPs? (I actually don't remember why this exists)
# @param default_to_pareto - [opt default=False] sets whether to always assume
# prefering pareto optimal choices when selecting points, if not particulary told not to
# @param alaways_select_best - [opt default=False] sets whether the select function should append the
# the top solution to the front of the solution set every time.
def __init__(self, fake_func=None, default_to_pareto=False, always_select_best=False):
super(MutualInfoLearner, self).__init__(default_to_pareto,always_select_best)
self.M = 75 # random value at the moment
self.peakiness = 10
self.fake_func = fake_func



## select_greedy
# This function greedily selects the best single data point
# Depending on the selection method, you are not forced to implement this function
# @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions
# @param mu - a numpy array of mu values outputed from predict. numpy (n)
# @param data - a user defined tuple of data (determined by the predict function of the model)
# @param indicies - a list or set of indicies in candidate points to consider.
# @param prev_selection - a set ofindicies of previously selected points
#
# @return the index of the greedy selection.
def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
if isinstance(self.model, (PreferenceGP, GP)):
variance = data
cov = self.model.cov

indicies = list(indicies)
prev_selection = list(prev_selection)

# sample M possible parameters w (reward values of the GP)
all_w = np.random.multivariate_normal(mu, cov, size=self.M)

if self.fake_func is not None:
fake_f_mean = np.mean(self.fake_func(candidate_pts))
samp_mean = np.mean(all_w)

print('Scaling using fake function: ' + str(fake_f_mean / samp_mean))
all_w = all_w * (fake_f_mean / samp_mean)


info_gain = [self.calc_info_gain(prev_selection + [idx], all_w) for idx in indicies]

return indicies[np.argmax(info_gain)]


elif isinstance(self.model, PreferenceLinear):
raise NotImplementedError("Have not implemented UCB with linear preferences")


# calculate the info gain for a query Q given the sampled parameters / reward W
# only need p(q|w,Q) human choice model given w = rewards and Q is the particular query.
# shouldn't this need p(w) as well? No because it is sampled from the distribution
# Can I solve that exactly with the GP?
#
# @param Q - list of indicies of query.
# @param all_w - a matrix of possible rewards for sample set of parameters [M,N]
# M - number of samples
# N - dimension of candidate points.
#
def calc_info_gain(self, Q, all_w):
# Find the probabilities of human selecting a query given the possible reward values
p = p_human_choice(all_w[:,Q], self.peakiness)
# find the sum of the probabilities of w
sum_p_over_w = np.sum(p, axis=0)

# Find the information gain using the sample equation (4) in [1]
info_gain = np.sum(p * np.log2(self.M * p / sum_p_over_w)) / self.M

return info_gain


1 change: 1 addition & 0 deletions src/lop/active_learning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .UCBLearner import UCBLearner
from .RandomLearner import RandomLearner
from .GV_UCBLearner import GV_UCBLearner
from .MutualInfoLearner import MutualInfoLearner
2 changes: 1 addition & 1 deletion tests/active_learning/test_GV_UCB_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_GV_UCB_learner_constructs():
assert isinstance(al, lop.GV_UCBLearner)
assert isinstance(model, lop.Model)

def test_UCB_learner_trains_basic_GP():
def test_GV_UCB_learner_trains_basic_GP():
al = lop.GV_UCBLearner()
model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)

Expand Down
48 changes: 48 additions & 0 deletions tests/active_learning/test_mutual_info_learner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# test_UCB_learner.py
# Written Ian Rankin - December 2023
#

import pytest

import numpy as np
import lop



# the function to approximate
def f_sin(x, data=None):
return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))


def test_mutual_info_learner_constructs():
al = lop.MutualInfoLearner()
model = lop.Model(active_learner=al)

assert isinstance(al, lop.MutualInfoLearner)
assert isinstance(model, lop.Model)

def test_mutual_info_learner_trains_basic_GP():
al = lop.MutualInfoLearner()
model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)


np.random.seed(5) # just to ensure it doesn't break the test on a bad dice roll
for i in range(10):
# generate random test set to select test point from
x_canidiates = np.random.random(20)*10

test_pt_idxs = model.select(x_canidiates, 2)


x_train = x_canidiates[test_pt_idxs]
y_train = f_sin(x_train)

model.add(x_train, y_train)


x_test = np.array([0,1,2,3,4.5,7,9])
y_test = f_sin(x_test)
y_pred = model(x_test)

assert (np.abs(y_pred - y_test) < 0.2).all()

4 changes: 2 additions & 2 deletions tests/active_learning/test_random_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ def f_sin(x, data=None):
return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))


def test_UCB_learner_constructs():
def test_random_learner_constructs():
al = lop.RandomLearner()
model = lop.Model(active_learner=al)

assert isinstance(al, lop.RandomLearner)
assert isinstance(model, lop.Model)

def test_UCB_learner_trains_basic_GP():
def test_random_learner_trains_basic_GP():
al = lop.RandomLearner()
model = lop.GP(lop.RBF_kern(0.5,1.0), active_learner=al)

Expand Down

0 comments on commit d7422bc

Please sign in to comment.