From 02cf18a6704e4d92dd3bdaf11b5778ffc1c5e7e1 Mon Sep 17 00:00:00 2001 From: ianran Date: Fri, 15 Dec 2023 17:09:54 -0800 Subject: [PATCH] Added partially correct UCB learner --- src/lop/active_learning/ActiveLearner.py | 9 +-- src/lop/active_learning/BestLearner.py | 6 +- src/lop/active_learning/UCBLearner.py | 73 +++++++---------------- tests/active_learning/test_UCB_learner.py | 12 ++++ 4 files changed, 43 insertions(+), 57 deletions(-) create mode 100644 tests/active_learning/test_UCB_learner.py diff --git a/src/lop/active_learning/ActiveLearner.py b/src/lop/active_learning/ActiveLearner.py index 53c40dc..9e261a7 100644 --- a/src/lop/active_learning/ActiveLearner.py +++ b/src/lop/active_learning/ActiveLearner.py @@ -97,7 +97,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re selected_idx = not_selected[0] not_selected.pop(0) else: - selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected) + selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts)) if selected_idx in pref_not_sel or len(pref_not_sel) == 0: @@ -117,7 +117,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re while len(sel_pts) < num_alts: # only select from the prefered points if they still exist if len(pref_not_sel) > 0: - selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel) + selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel, prev_selection | set(sel_pts)) pref_not_sel.remove(selected_idx) else: # get if the set of points not selected and not prefered if not already defined @@ -126,7 +126,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re # ensure that there is at least some pts left to select from if len(all_not_selected) == 0: raise Exception("Not enough points for select to create a full set") - selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected) + selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts)) all_not_selected.remove(selected_idx) # add the selected index @@ -197,9 +197,10 @@ def select_best(self, mu, prefer_pts, prev_selection=set()): # @param mu - a numpy array of mu values outputed from predict. numpy (n) # @param data - a user defined tuple of data (determined by the predict function of the model) # @param indicies - a list or set of indicies in candidate points to consider. + # @param prev_selection - a set ofindicies of previously selected points # # @return the index of the greedy selection. - def select_greedy(self, candidate_pts, mu, data, indicies): + def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): raise NotImplementedError("ActiveLearner select_greedy is not implemented and has been called") # select_greedy_k diff --git a/src/lop/active_learning/BestLearner.py b/src/lop/active_learning/BestLearner.py index 3099761..f620741 100644 --- a/src/lop/active_learning/BestLearner.py +++ b/src/lop/active_learning/BestLearner.py @@ -37,9 +37,10 @@ class BestLearner(ActiveLearner): # @param mu - a numpy array of mu values outputed from predict. numpy (n) # @param data - a user defined tuple of data (determined by the predict function of the model) # @param indicies - a list or set of indicies in candidate points to consider. + # @param prev_selection - a set ofindicies of previously selected points # # @return the index of the greedy selection. - def select_greedy(self, candidate_pts, mu, data, indicies): + def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): indicies = list(indicies) select_mu = mu[indicies] @@ -56,9 +57,10 @@ class WorstLearner(ActiveLearner): # @param mu - a numpy array of mu values outputed from predict. numpy (n) # @param data - a user defined tuple of data (determined by the predict function of the model) # @param indicies - a list or set of indicies in candidate points to consider. + # @param prev_selection - a set ofindicies of previously selected points # # @return the index of the greedy selection. - def select_greedy(self, candidate_pts, mu, data, indicies): + def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): indicies = list(indicies) select_mu = mu[indicies] diff --git a/src/lop/active_learning/UCBLearner.py b/src/lop/active_learning/UCBLearner.py index a2107b9..1768a33 100644 --- a/src/lop/active_learning/UCBLearner.py +++ b/src/lop/active_learning/UCBLearner.py @@ -29,62 +29,33 @@ class UCBLearner(ActiveLearner): ## Constructor # @param alpha - the scaler value on the UCB equation UCB = mean + alpha*sqrt(variance) - def __init__(self, alpha=1): - super(UCBLearner, self).__init__() + # @param default_to_pareto - [opt default=False] sets whether to always assume + # prefering pareto optimal choices when selecting points, if not particulary told not to + # @param alaways_select_best - [opt default=False] sets whether the select function should append the + # the top solution to the front of the solution set every time. + def __init__(self, alpha=1, default_to_pareto=False, always_select_best=False): + super(UCBLearner, self).__init__(default_to_pareto,always_select_best) self.alpha = alpha - ## select - # Selects the given points + + + ## select_greedy + # This function greedily selects the best single data point + # Depending on the selection method, you are not forced to implement this function # @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions - # @param num_alts - the number of alterantives to selec (including the highest mean) - # @param prev_selection - [opt, default = []]a list of indicies that - # @param prefer_num - [default = None] the points at the start of the candidates - # to prefer selecting from. Returned as: - # a. A number of points at the start of canididate_pts to prefer - # b. A set of points to prefer to select. - # c. 'pareto' to indicate - # d. Enter 0 explicitly ignore selections - # e. None (default) assumes 0 unless default to pareto is true. - # @param return_not - [opt default-false] returns the not selected points when there - # a preference to selecting to certian points. [] if not but set to true. - # + # @param mu - a numpy array of mu values outputed from predict. numpy (n) + # @param data - a user defined tuple of data (determined by the predict function of the model) + # @param indicies - a list or set of indicies in candidate points to consider. + # @param prev_selection - a set ofindicies of previously selected points # - # @return [highest_mean, highest_selection, next highest selection, ...], - # selection values for candidate_pts, - # only returns highest mean if "always select best is set" - def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, not_selected=False): - prefer_pts = self.get_prefered_set_of_pts(candidate_pts, prefer_pts) - + # @return the index of the greedy selection. + def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): if isinstance(self.model, (PreferenceGP, GP)): - mu, variance = self.gp.predict(candidate_pts) - UCB = mu + self.alpha*np.sqrt(variance) + variance = data elif isinstance(self.model, PreferenceLinear): - UCB = 1 # TODO - else: - raise Exception("UCBLearner does not know how to handle model of type: " + str(type(self.model))) - - best_idx = np.argmax(mu) - - selected_idx = self.select_best_k(UCB, num_alts, best_idx, prefer_num) - - return selected_idx, UCB[selected_idx], mu[best_idx] - - - - def select_greedy(self, cur_selection, data): - mu, variance, cov, prefer_num = data - - best_v = -float('inf') - best_i = -1 - - exp_v = 1.0 / (len(cur_selection) + 1) - for i in [x for x in range(len(mu)) if x not in cur_selection]: - vari = variance[i] - - value = (1-self.alpha)*mu[i] + self.alpha*np.sqrt(vari) + raise NotImplementedError("Have not implemented UCB with linear preferences") + indicies = list(indicies) - if value > best_v: - best_v = value - best_i = i + selected_UCB = mu[indicies] + self.alpha*np.sqrt(variance[indicies]) - return best_i, best_v + return indicies[np.argmax(selected_UCB)] diff --git a/tests/active_learning/test_UCB_learner.py b/tests/active_learning/test_UCB_learner.py new file mode 100644 index 0000000..5723fb0 --- /dev/null +++ b/tests/active_learning/test_UCB_learner.py @@ -0,0 +1,12 @@ +# test_UCB_learner.py +# Written Ian Rankin - December 2023 +# + +import pytest + +import numpy as np +import lop + + +def test_UCB_learner(): + assert False