Skip to content

Commit

Permalink
Added partially correct UCB learner
Browse files Browse the repository at this point in the history
  • Loading branch information
ianran committed Dec 16, 2023
1 parent 04f89e7 commit 02cf18a
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 57 deletions.
9 changes: 5 additions & 4 deletions src/lop/active_learning/ActiveLearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
selected_idx = not_selected[0]
not_selected.pop(0)
else:
selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected)
selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts))


if selected_idx in pref_not_sel or len(pref_not_sel) == 0:
Expand All @@ -117,7 +117,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
while len(sel_pts) < num_alts:
# only select from the prefered points if they still exist
if len(pref_not_sel) > 0:
selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel)
selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel, prev_selection | set(sel_pts))
pref_not_sel.remove(selected_idx)
else:
# get if the set of points not selected and not prefered if not already defined
Expand All @@ -126,7 +126,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
# ensure that there is at least some pts left to select from
if len(all_not_selected) == 0:
raise Exception("Not enough points for select to create a full set")
selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected)
selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts))
all_not_selected.remove(selected_idx)

# add the selected index
Expand Down Expand Up @@ -197,9 +197,10 @@ def select_best(self, mu, prefer_pts, prev_selection=set()):
# @param mu - a numpy array of mu values outputed from predict. numpy (n)
# @param data - a user defined tuple of data (determined by the predict function of the model)
# @param indicies - a list or set of indicies in candidate points to consider.
# @param prev_selection - a set ofindicies of previously selected points
#
# @return the index of the greedy selection.
def select_greedy(self, candidate_pts, mu, data, indicies):
def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
raise NotImplementedError("ActiveLearner select_greedy is not implemented and has been called")

# select_greedy_k
Expand Down
6 changes: 4 additions & 2 deletions src/lop/active_learning/BestLearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ class BestLearner(ActiveLearner):
# @param mu - a numpy array of mu values outputed from predict. numpy (n)
# @param data - a user defined tuple of data (determined by the predict function of the model)
# @param indicies - a list or set of indicies in candidate points to consider.
# @param prev_selection - a set ofindicies of previously selected points
#
# @return the index of the greedy selection.
def select_greedy(self, candidate_pts, mu, data, indicies):
def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
indicies = list(indicies)

select_mu = mu[indicies]
Expand All @@ -56,9 +57,10 @@ class WorstLearner(ActiveLearner):
# @param mu - a numpy array of mu values outputed from predict. numpy (n)
# @param data - a user defined tuple of data (determined by the predict function of the model)
# @param indicies - a list or set of indicies in candidate points to consider.
# @param prev_selection - a set ofindicies of previously selected points
#
# @return the index of the greedy selection.
def select_greedy(self, candidate_pts, mu, data, indicies):
def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
indicies = list(indicies)

select_mu = mu[indicies]
Expand Down
73 changes: 22 additions & 51 deletions src/lop/active_learning/UCBLearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,62 +29,33 @@
class UCBLearner(ActiveLearner):
## Constructor
# @param alpha - the scaler value on the UCB equation UCB = mean + alpha*sqrt(variance)
def __init__(self, alpha=1):
super(UCBLearner, self).__init__()
# @param default_to_pareto - [opt default=False] sets whether to always assume
# prefering pareto optimal choices when selecting points, if not particulary told not to
# @param alaways_select_best - [opt default=False] sets whether the select function should append the
# the top solution to the front of the solution set every time.
def __init__(self, alpha=1, default_to_pareto=False, always_select_best=False):
super(UCBLearner, self).__init__(default_to_pareto,always_select_best)
self.alpha = alpha

## select
# Selects the given points


## select_greedy
# This function greedily selects the best single data point
# Depending on the selection method, you are not forced to implement this function
# @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions
# @param num_alts - the number of alterantives to selec (including the highest mean)
# @param prev_selection - [opt, default = []]a list of indicies that
# @param prefer_num - [default = None] the points at the start of the candidates
# to prefer selecting from. Returned as:
# a. A number of points at the start of canididate_pts to prefer
# b. A set of points to prefer to select.
# c. 'pareto' to indicate
# d. Enter 0 explicitly ignore selections
# e. None (default) assumes 0 unless default to pareto is true.
# @param return_not - [opt default-false] returns the not selected points when there
# a preference to selecting to certian points. [] if not but set to true.
#
# @param mu - a numpy array of mu values outputed from predict. numpy (n)
# @param data - a user defined tuple of data (determined by the predict function of the model)
# @param indicies - a list or set of indicies in candidate points to consider.
# @param prev_selection - a set ofindicies of previously selected points
#
# @return [highest_mean, highest_selection, next highest selection, ...],
# selection values for candidate_pts,
# only returns highest mean if "always select best is set"
def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, not_selected=False):
prefer_pts = self.get_prefered_set_of_pts(candidate_pts, prefer_pts)

# @return the index of the greedy selection.
def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
if isinstance(self.model, (PreferenceGP, GP)):
mu, variance = self.gp.predict(candidate_pts)
UCB = mu + self.alpha*np.sqrt(variance)
variance = data
elif isinstance(self.model, PreferenceLinear):
UCB = 1 # TODO
else:
raise Exception("UCBLearner does not know how to handle model of type: " + str(type(self.model)))

best_idx = np.argmax(mu)

selected_idx = self.select_best_k(UCB, num_alts, best_idx, prefer_num)

return selected_idx, UCB[selected_idx], mu[best_idx]



def select_greedy(self, cur_selection, data):
mu, variance, cov, prefer_num = data

best_v = -float('inf')
best_i = -1

exp_v = 1.0 / (len(cur_selection) + 1)
for i in [x for x in range(len(mu)) if x not in cur_selection]:
vari = variance[i]

value = (1-self.alpha)*mu[i] + self.alpha*np.sqrt(vari)
raise NotImplementedError("Have not implemented UCB with linear preferences")
indicies = list(indicies)

if value > best_v:
best_v = value
best_i = i
selected_UCB = mu[indicies] + self.alpha*np.sqrt(variance[indicies])

return best_i, best_v
return indicies[np.argmax(selected_UCB)]
12 changes: 12 additions & 0 deletions tests/active_learning/test_UCB_learner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# test_UCB_learner.py
# Written Ian Rankin - December 2023
#

import pytest

import numpy as np
import lop


def test_UCB_learner():
assert False

0 comments on commit 02cf18a

Please sign in to comment.