Added active learning for linear models

Robotic-Decision-Making-Lab · Jan 19, 2024 · b74b338 · b74b338
1 parent d8f807a
commit b74b338
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 6 deletions.
diff --git a/src/lop/active_learning/GV_UCBLearner.py b/src/lop/active_learning/GV_UCBLearner.py
@@ -26,6 +26,7 @@
 
 from lop.active_learning import UCBLearner
 from lop.models import PreferenceGP, GP, PreferenceLinear
+from lop.utilities import metropolis_hastings
 
 class GV_UCBLearner(UCBLearner):
     ## select_greedy
@@ -43,7 +44,15 @@ def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
             variance = data
             cov = self.model.cov
         elif isinstance(self.model, PreferenceLinear):
-            raise NotImplementedError("Have not implemented UCB with linear preferences")
+            w_samples = metropolis_hastings(self.model.loss_func, 200, dim=candidate_pts.shape[1])
+
+            w_norm = np.linalg.norm(w_samples, axis=1)
+            w_samples = w_samples / np.tile(w_norm, (2,1)).T
+            # generate possible outputs from weighted samples
+            all_w = (candidate_pts @ w_samples.T).T
+
+            cov = np.cov(all_w.T)
+            variance = np.diagonal(cov)
         indicies = list(indicies)
         prev_selection = list(prev_selection)
 

diff --git a/src/lop/active_learning/MutualInfoLearner.py b/src/lop/active_learning/MutualInfoLearner.py
@@ -61,19 +61,22 @@ def __init__(self, fake_func=None, default_to_pareto=False, always_select_best=F
     #
     # @return the index of the greedy selection.
     def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
+        indicies = list(indicies)
+        prev_selection = list(prev_selection)
         if isinstance(self.model, (PreferenceGP, GP)):
             variance = data
             cov = self.model.cov
 
-            indicies = list(indicies)
-            prev_selection = list(prev_selection)
-
             # sample M possible parameters w (reward values of the GP)
             all_w = np.random.multivariate_normal(mu, cov, size=self.M)
         elif isinstance(self.model, PreferenceLinear):
             w_samples = metropolis_hastings(self.model.loss_func, self.M, dim=candidate_pts.shape[1])
+
+            w_norm = np.linalg.norm(w_samples, axis=1)
+            w_samples = w_samples / np.tile(w_norm, (2,1)).T
             # generate possible outputs from weighted samples
-            #### TODO HERE
+            all_w = (candidate_pts @ w_samples.T).T
+
         if self.fake_func is not None:
             fake_f_mean = np.mean(self.fake_func(candidate_pts))
             samp_mean = np.mean(all_w)

diff --git a/src/lop/active_learning/UCBLearner.py b/src/lop/active_learning/UCBLearner.py
@@ -25,6 +25,7 @@
 
 from lop.active_learning import ActiveLearner
 from lop.models import PreferenceGP, GP, PreferenceLinear
+from lop.utilities import metropolis_hastings
 
 class UCBLearner(ActiveLearner):
     ## Constructor
@@ -53,7 +54,14 @@ def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection):
         if isinstance(self.model, (PreferenceGP, GP)):
             variance = data
         elif isinstance(self.model, PreferenceLinear):
-            raise NotImplementedError("Have not implemented UCB with linear preferences")
+            w_samples = metropolis_hastings(self.model.loss_func, 200, dim=candidate_pts.shape[1])
+
+            w_norm = np.linalg.norm(w_samples, axis=1)
+            w_samples = w_samples / np.tile(w_norm, (2,1)).T
+            # generate possible outputs from weighted samples
+            all_w = (candidate_pts @ w_samples.T).T
+
+            variance = np.var(all_w, axis=0)
         indicies = list(indicies)
 
         selected_UCB = mu[indicies] + self.alpha*np.sqrt(variance[indicies])

diff --git a/src/lop/models/PreferenceLinear.py b/src/lop/models/PreferenceLinear.py
@@ -144,5 +144,8 @@ def derivatives(self, x, y, w):
     # this is equation (139)
     # @param w - the weights of the function
     def loss_func(self, w):
+        if self.X_train is None:
+            return 0
+        w = w / np.linalg.norm(w, ord=2)
         F = (self.X_train @ w[:,np.newaxis])[:,0]
         return self.log_likelyhood_training(F)