diff --git a/src/lop/active_learning/GV_UCBLearner.py b/src/lop/active_learning/GV_UCBLearner.py index 2fc899d..a510c69 100644 --- a/src/lop/active_learning/GV_UCBLearner.py +++ b/src/lop/active_learning/GV_UCBLearner.py @@ -26,6 +26,7 @@ from lop.active_learning import UCBLearner from lop.models import PreferenceGP, GP, PreferenceLinear +from lop.utilities import metropolis_hastings class GV_UCBLearner(UCBLearner): ## select_greedy @@ -43,7 +44,15 @@ def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): variance = data cov = self.model.cov elif isinstance(self.model, PreferenceLinear): - raise NotImplementedError("Have not implemented UCB with linear preferences") + w_samples = metropolis_hastings(self.model.loss_func, 200, dim=candidate_pts.shape[1]) + + w_norm = np.linalg.norm(w_samples, axis=1) + w_samples = w_samples / np.tile(w_norm, (2,1)).T + # generate possible outputs from weighted samples + all_w = (candidate_pts @ w_samples.T).T + + cov = np.cov(all_w.T) + variance = np.diagonal(cov) indicies = list(indicies) prev_selection = list(prev_selection) diff --git a/src/lop/active_learning/MutualInfoLearner.py b/src/lop/active_learning/MutualInfoLearner.py index 9756b09..523c822 100644 --- a/src/lop/active_learning/MutualInfoLearner.py +++ b/src/lop/active_learning/MutualInfoLearner.py @@ -61,19 +61,22 @@ def __init__(self, fake_func=None, default_to_pareto=False, always_select_best=F # # @return the index of the greedy selection. def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): + indicies = list(indicies) + prev_selection = list(prev_selection) if isinstance(self.model, (PreferenceGP, GP)): variance = data cov = self.model.cov - indicies = list(indicies) - prev_selection = list(prev_selection) - # sample M possible parameters w (reward values of the GP) all_w = np.random.multivariate_normal(mu, cov, size=self.M) elif isinstance(self.model, PreferenceLinear): w_samples = metropolis_hastings(self.model.loss_func, self.M, dim=candidate_pts.shape[1]) + + w_norm = np.linalg.norm(w_samples, axis=1) + w_samples = w_samples / np.tile(w_norm, (2,1)).T # generate possible outputs from weighted samples - #### TODO HERE + all_w = (candidate_pts @ w_samples.T).T + if self.fake_func is not None: fake_f_mean = np.mean(self.fake_func(candidate_pts)) samp_mean = np.mean(all_w) diff --git a/src/lop/active_learning/UCBLearner.py b/src/lop/active_learning/UCBLearner.py index 1768a33..8998736 100644 --- a/src/lop/active_learning/UCBLearner.py +++ b/src/lop/active_learning/UCBLearner.py @@ -25,6 +25,7 @@ from lop.active_learning import ActiveLearner from lop.models import PreferenceGP, GP, PreferenceLinear +from lop.utilities import metropolis_hastings class UCBLearner(ActiveLearner): ## Constructor @@ -53,7 +54,14 @@ def select_greedy(self, candidate_pts, mu, data, indicies, prev_selection): if isinstance(self.model, (PreferenceGP, GP)): variance = data elif isinstance(self.model, PreferenceLinear): - raise NotImplementedError("Have not implemented UCB with linear preferences") + w_samples = metropolis_hastings(self.model.loss_func, 200, dim=candidate_pts.shape[1]) + + w_norm = np.linalg.norm(w_samples, axis=1) + w_samples = w_samples / np.tile(w_norm, (2,1)).T + # generate possible outputs from weighted samples + all_w = (candidate_pts @ w_samples.T).T + + variance = np.var(all_w, axis=0) indicies = list(indicies) selected_UCB = mu[indicies] + self.alpha*np.sqrt(variance[indicies]) diff --git a/src/lop/models/PreferenceLinear.py b/src/lop/models/PreferenceLinear.py index 5cd0089..28968ba 100644 --- a/src/lop/models/PreferenceLinear.py +++ b/src/lop/models/PreferenceLinear.py @@ -144,5 +144,8 @@ def derivatives(self, x, y, w): # this is equation (139) # @param w - the weights of the function def loss_func(self, w): + if self.X_train is None: + return 0 + w = w / np.linalg.norm(w, ord=2) F = (self.X_train @ w[:,np.newaxis])[:,0] return self.log_likelyhood_training(F)