Added partially correct UCB learner

Robotic-Decision-Making-Lab · Dec 16, 2023 · 02cf18a · 02cf18a
1 parent 04f89e7
commit 02cf18a
Show file tree

Hide file tree

Showing 4 changed files with 43 additions and 57 deletions.
diff --git a/src/lop/active_learning/ActiveLearner.py b/src/lop/active_learning/ActiveLearner.py
@@ -97,7 +97,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
                     selected_idx = not_selected[0]
                     not_selected.pop(0) 
                 else:
-                    selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected)
+                    selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts))
 
 
                 if selected_idx in pref_not_sel or len(pref_not_sel) == 0:
@@ -117,7 +117,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
             while len(sel_pts) < num_alts:
                 # only select from the prefered points if they still exist
                 if len(pref_not_sel) > 0:
-                    selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel)
+                    selected_idx = self.select_greedy(candidate_pts, mu, data, pref_not_sel, prev_selection | set(sel_pts))
                     pref_not_sel.remove(selected_idx)
                 else:
                     # get if the set of points not selected and not prefered if not already defined
@@ -126,7 +126,7 @@ def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, re
                     # ensure that there is at least some pts left to select from
                     if len(all_not_selected) == 0:
                         raise Exception("Not enough points for select to create a full set")
-                    selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected)
+                    selected_idx = self.select_greedy(candidate_pts, mu, data, all_not_selected, prev_selection | set(sel_pts))
                     all_not_selected.remove(selected_idx)
 
                 # add the selected index
@@ -197,9 +197,10 @@ def select_best(self, mu, prefer_pts, prev_selection=set()):
     # @param mu - a numpy array of mu values outputed from predict. numpy (n)
     # @param data - a user defined tuple of data (determined by the predict function of the model)
     # @param indicies - a list or set of indicies in candidate points to consider.
+    # @param prev_selection - a set ofindicies of previously selected points
     #
     # @return the index of the greedy selection.
-    def select_greedy(self, candidate_pts, mu, data, indicies):
+    def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
         raise NotImplementedError("ActiveLearner select_greedy is not implemented and has been called")
 
     # select_greedy_k

diff --git a/src/lop/active_learning/BestLearner.py b/src/lop/active_learning/BestLearner.py
@@ -37,9 +37,10 @@ class BestLearner(ActiveLearner):
     # @param mu - a numpy array of mu values outputed from predict. numpy (n)
     # @param data - a user defined tuple of data (determined by the predict function of the model)
     # @param indicies - a list or set of indicies in candidate points to consider.
+    # @param prev_selection - a set ofindicies of previously selected points
     #
     # @return the index of the greedy selection.
-    def select_greedy(self, candidate_pts, mu, data, indicies):
+    def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
         indicies = list(indicies)
 
         select_mu = mu[indicies]
@@ -56,9 +57,10 @@ class WorstLearner(ActiveLearner):
     # @param mu - a numpy array of mu values outputed from predict. numpy (n)
     # @param data - a user defined tuple of data (determined by the predict function of the model)
     # @param indicies - a list or set of indicies in candidate points to consider.
+    # @param prev_selection - a set ofindicies of previously selected points
     #
     # @return the index of the greedy selection.
-    def select_greedy(self, candidate_pts, mu, data, indicies):
+    def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
         indicies = list(indicies)
 
         select_mu = mu[indicies]

diff --git a/src/lop/active_learning/UCBLearner.py b/src/lop/active_learning/UCBLearner.py
@@ -29,62 +29,33 @@
 class UCBLearner(ActiveLearner):
     ## Constructor
     # @param alpha - the scaler value on the UCB equation UCB = mean + alpha*sqrt(variance)
-    def __init__(self, alpha=1):
-        super(UCBLearner, self).__init__()
+    # @param default_to_pareto - [opt default=False] sets whether to always assume
+    #               prefering pareto optimal choices when selecting points, if not particulary told not to
+    # @param alaways_select_best - [opt default=False] sets whether the select function should append the
+    #               the top solution to the front of the solution set every time.
+    def __init__(self, alpha=1, default_to_pareto=False, always_select_best=False):
+        super(UCBLearner, self).__init__(default_to_pareto,always_select_best)
         self.alpha = alpha
 
-    ## select
-    # Selects the given points
+
+
+    ## select_greedy
+    # This function greedily selects the best single data point
+    # Depending on the selection method, you are not forced to implement this function
     # @param candidate_pts - a numpy array of points (nxk), n = number points, k = number of dimmensions
-    # @param num_alts - the number of alterantives to selec (including the highest mean)
-    # @param prev_selection - [opt, default = []]a list of indicies that 
-    # @param prefer_num - [default = None] the points at the start of the candidates
-    #                   to prefer selecting from. Returned as:
-    #                   a. A number of points at the start of canididate_pts to prefer
-    #                   b. A set of points to prefer to select.
-    #                   c. 'pareto' to indicate 
-    #                   d. Enter 0 explicitly ignore selections
-    #                   e. None (default) assumes 0 unless default to pareto is true.
-    # @param return_not - [opt default-false] returns the not selected points when there
-    #                   a preference to selecting to certian points. [] if not but set to true.
-    #                   
+    # @param mu - a numpy array of mu values outputed from predict. numpy (n)
+    # @param data - a user defined tuple of data (determined by the predict function of the model)
+    # @param indicies - a list or set of indicies in candidate points to consider.
+    # @param prev_selection - a set ofindicies of previously selected points
     #
-    # @return [highest_mean, highest_selection, next highest selection, ...],
-    #          selection values for candidate_pts,
-    #          only returns highest mean if "always select best is set"
-    def select(self, candidate_pts, num_alts, prev_selection=[], prefer_pts=None, not_selected=False):
-        prefer_pts = self.get_prefered_set_of_pts(candidate_pts, prefer_pts)
-
+    # @return the index of the greedy selection.
+    def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
         if isinstance(self.model, (PreferenceGP, GP)):
-            mu, variance = self.gp.predict(candidate_pts)
-            UCB = mu + self.alpha*np.sqrt(variance)
+            variance = data
         elif isinstance(self.model, PreferenceLinear):
-            UCB = 1 # TODO
-        else:
-            raise Exception("UCBLearner does not know how to handle model of type: " + str(type(self.model)))
-
-        best_idx = np.argmax(mu)
-
-        selected_idx = self.select_best_k(UCB, num_alts, best_idx, prefer_num)
-
-        return selected_idx, UCB[selected_idx], mu[best_idx]
-
-
-
-    def select_greedy(self, cur_selection, data):
-        mu, variance, cov, prefer_num = data
-
-        best_v = -float('inf')
-        best_i = -1
-
-        exp_v = 1.0 / (len(cur_selection) + 1)
-        for i in [x for x in range(len(mu)) if x not in cur_selection]:
-            vari = variance[i]
-
-            value = (1-self.alpha)*mu[i] + self.alpha*np.sqrt(vari)
+            raise NotImplementedError("Have not implemented UCB with linear preferences")
+        indicies = list(indicies)
 
-            if value > best_v:
-                best_v = value
-                best_i = i
+        selected_UCB = mu[indicies] + self.alpha*np.sqrt(variance[indicies])
 
-        return best_i, best_v
+        return indicies[np.argmax(selected_UCB)]
diff --git a/tests/active_learning/test_UCB_learner.py b/tests/active_learning/test_UCB_learner.py
@@ -0,0 +1,12 @@
+# test_UCB_learner.py
+# Written Ian Rankin - December 2023
+#
+
+import pytest
+
+import numpy as np
+import lop
+
+
+def test_UCB_learner():
+    assert False