Added linear preferences, optimization not complete

Robotic-Decision-Making-Lab · Nov 16, 2023 · 3d46b3d · 3d46b3d
1 parent b7ff28e
commit 3d46b3d
Show file tree

Hide file tree

Showing 5 changed files with 319 additions and 0 deletions.
diff --git a/src/rdml_graph/gaussian_process/PreferenceGP.py b/src/rdml_graph/gaussian_process/PreferenceGP.py
@@ -78,6 +78,9 @@ def __init__(self, cov_func, normalize_gp=True, pareto_pairs=False, \
 
 
         for key in other_probits:
+            if not isinstance(other_probits[key], ProbitBase):
+                raise TypeError("Preference Linear pased a probit that is not a probit: " + str(other_probits[key]))
+
             self.probits.append(other_probits[key])
 
 

diff --git a/src/rdml_graph/gaussian_process/PreferenceLinear.py b/src/rdml_graph/gaussian_process/PreferenceLinear.py
@@ -0,0 +1,120 @@
+# Copyright 2023 Ian Rankin
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
+# to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
+# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# PreferenceLinear.py
+# Written Ian Rankin - November 2023
+#
+# A linear latent function to learn the given preferences.
+
+import numpy as np
+import sys
+if sys.version_info[0] >= 3 and sys.version_info[1] >= 3:
+    from collections.abc import Sequence
+else:
+    from collections import Sequence
+
+from rdml_graph.gaussian_process import PreferenceProbit, ProbitBase
+from rdml_graph.gaussian_process import k_fold_half, get_dk
+from rdml_graph.gaussian_process import PreferenceModel
+
+import pdb
+
+class PreferenceLinear(PreferenceModel):
+    ## init function
+    # @param pareto_pairs - [opt] sets whether to assume pareto optimal user preferences.
+    # @param other_probits - [opt] sets additional types of probits to add.
+    def __init__(self, pareto_pairs=False, other_probits={}):
+        super(PreferenceLinear, self).__init__(pareto_pairs, other_probits)
+
+
+        self.probits = [PreferenceProbit(sigma = 1.0)]
+
+        self.lambda_newton = 0.3
+        for key in other_probits:
+            if not isinstance(other_probits[key], ProbitBase):
+                raise TypeError("PreferenceLinear pased a probit that is not a probit: " + str(other_probits[key]))
+
+            self.probits.append(other_probits[key])        
+
+
+
+    ## P_w
+    # Probability of w given the training data
+    def log_P_w(self, w):
+        log_p_w = 0.0
+        for j, probit in enumerate(self.probits):
+            if self.y_train[j] is not None:
+                p_w_local = probit.log_likelihood(self.y_train[j], F)
+
+                log_p_w += p_w_local
+
+        return log_p_w
+
+
+    ## derivatives
+    # Calculates the derivatives for all of the given probits.
+    # @param y - the given set of labels for the probit
+    #              this is given as a list of [(dk, u, v), ...]
+    # @param F - the input data samples
+    #
+    # @return - W, dpy_df, py
+    #       W - is the second order derivative of the probit with respect to F
+    #       dpy_df - the derivative of log P(y|x,theta) with respect to F
+    #       py - log P(y|x,theta) for the given probit
+    def derivatives(self, x, y, w):
+        F = (x @ w[:,np.newaxis])[:,0]
+
+        W = np.zeros((len(F), len(F)))
+        grad_ll = np.zeros(len(F))
+        log_likelihood = 0
+        for j, probit in enumerate(self.probits):
+            if self.y_train[j] is not None:
+                W_local, dpy_df_local, py_local = probit.derivatives(y[j], F)
+
+                W += W_local
+                grad_ll += dpy_df_local
+                log_likelihood += py_local
+
+
+        # need to multiply by derivative of dl/df * df/dw
+        grad_ll = (grad_ll[np.newaxis,:] @ x)[0]
+        W = x.T @ W @ x
+
+        return W, grad_ll, log_likelihood
+
+    ## optimize
+    # Runs the optimization step required by the user preference GP.
+    # @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters
+    def optimize(self, optimize_hyperparameter=False):
+        if len(self.X_train.shape) > 1:
+            self.w = np.random.random(self.X_train.shape[1])
+        else:
+            print('Only 1 reward parameter... linear model practically does not make sense')
+            self.w = np.random.random(1)
+
+
+        # just do gradient decent.
+        W, dpy_dw, py = self.derivatives(self.X_train, self.y_train, self.w)
+
+        pdb.set_trace()
+
+        self.optimized = True
+
+
+
+
diff --git a/src/rdml_graph/gaussian_process/PreferenceModel.py b/src/rdml_graph/gaussian_process/PreferenceModel.py
@@ -237,3 +237,11 @@ def add(self, X, y, type='relative_discrete', training_sigma=0):
         self.optimized = False
 
 
+
+    ## optimize
+    # Runs the optimization step required by the user preference GP.
+    # @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters
+    def optimize(self, optimize_hyperparameter=False):
+        raise NotImplementedError("PreferenceModel optimize function not implemented")
+        self.optimized = True
+
diff --git a/src/rdml_graph/gaussian_process/__init__.py b/src/rdml_graph/gaussian_process/__init__.py
@@ -8,6 +8,7 @@
 from .OrdinalProbit import OrdinalProbit
 from .AbsBoundProbit import AbsBoundProbit
 from .PreferenceGP import PreferenceGP
+from .PreferenceLinear import PreferenceLinear
 from .HumanChoiceModel import p_human_choice, sample_human_choice
 from .MutualInformationLearner import MutualInformationLearner
 from .MutualUCBLearner import MutualUCBLearner
diff --git a/tests/GaussianProcess/test_user_linear.py b/tests/GaussianProcess/test_user_linear.py
@@ -0,0 +1,187 @@
+# test_user_GP.py
+# Written Ian Rankin - October 2022
+#
+# A set of tests for the user preferences.
+
+import pytest
+
+import numpy as np
+import rdml_graph as gr
+import tqdm
+
+
+def f_sin(x, data=None):
+    return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))
+
+def f_lin(x, data=None):
+    #return x[:,0]*x[:,1]
+    return x[:,0]+x[:,1]
+
+def f_sq(x, data=None):
+    return x[:,0]*x[:,0] + 1.2*x[:,1]
+
+
+def test_user_gp():
+    X_train = np.array([[0,0],[1,2],[2,4],[3,2],[4.2, 5.6],[6,2],[7,8]])
+    pairs = gr.generate_fake_pairs(X_train, f_lin, 0) + \
+            gr.generate_fake_pairs(X_train, f_lin, 1) + \
+            gr.generate_fake_pairs(X_train, f_lin, 2) + \
+            gr.generate_fake_pairs(X_train, f_lin, 3) + \
+            gr.generate_fake_pairs(X_train, f_lin, 4)
+
+
+    gp = gr.PreferenceLinear()
+    #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5))
+    #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2))
+    #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3))
+    #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3))
+
+    gp.add(X_train, pairs)
+
+    gp.optimize(optimize_hyperparameter=False)
+    #print('gp.calc_ll()')
+    #print(gp.calc_ll())
+
+
+    X = np.arange(-0.5, 8, 0.1)
+    mu, sigma = gp.predict(X)
+    std = np.sqrt(sigma)
+
+    y, sigma = gp.predict(X_train)
+
+    for i in range(len(X_train)):
+        if i != 0:
+            assert y[0] > y[i]
+        if i!= 1:
+            assert y[1] < y[i]
+
+
+
+# def user_gp_active_func(utility_f):
+#     num_side = 25
+#     bounds = [(0,7), (0,7)]
+
+#     num_train_pts = 40
+#     num_alts = 4
+
+
+#     #gp = gr.PreferenceGP(gr.RBF_kern(0.2,0.5)*gr.linear_kern(0.2, 0.1, 0))
+#     #gp = gr.PreferenceGP(gr.linear_kern(0.3, 0.1, 0.0))
+#     gp = gr.PreferenceGP(gr.RBF_kern(1.0, 1.0), pareto_pairs=True, \
+#                         use_hyper_optimization=False, \
+#                         active_learner = gr.DetLearner(1.0))
+#     gp.add_prior(bounds=np.array(bounds), num_pts=20)
+
+
+
+#     for i in tqdm.tqdm(range(10)):
+#         train_X = np.random.random((num_train_pts,2)) * np.array([bounds[0][1]-bounds[0][0], bounds[1][1]-bounds[1][0]]) + np.array([bounds[0][0], bounds[1][0]])
+#         train_Y = utility_f(train_X)#f_lin(train_X)
+
+#         #pdb.set_trace()
+#         selected_idx, UCB, best_value = gp.select(train_X, num_alts)
+#         #selected_idx = gp.active_learner.select_previous(train_X, num_alts=num_alts)
+
+#         best_idx = np.argmax(train_Y[selected_idx])
+
+#         pairs = gr.ranked_pairs_from_fake(train_X[selected_idx], utility_f)
+
+#         print(pairs)
+#         print(train_Y[selected_idx])
+#         print(train_X[selected_idx])
+
+
+#         gp.add(train_X[selected_idx], pairs)
+
+#     gp.optimize(optimize_hyperparameter=False)
+
+#     x = np.linspace(bounds[0][0], bounds[0][1], num_side)
+#     y = np.linspace(bounds[1][0], bounds[1][1], num_side)
+
+#     X, Y = np.meshgrid(x,y)
+#     points = np.vstack([X.ravel(), Y.ravel()]).transpose()
+#     z = utility_f(points)
+#     z_norm = np.linalg.norm(z, ord=np.inf)
+#     z = z / z_norm
+#     Z = np.reshape(z, (num_side, num_side))
+
+#     z_predicted, z_sigma = gp.predict(points)
+#     ucb_pred = z_predicted + np.sqrt(z_sigma)*1
+#     Z_pred = np.reshape(z_predicted, (num_side, num_side))
+#     UCB_pred = np.reshape(ucb_pred, (num_side, num_side))
+
+
+#     assert UCB_pred[-1,-1] > UCB_pred[0,0]
+#     assert UCB_pred[-2,-2] > UCB_pred[0,0]
+#     assert UCB_pred[-3,-3] > UCB_pred[0,0]
+
+
+
+# def test_user_gp_active_sq():
+#     user_gp_active_func(f_sq)
+
+# def test_user_gp_active_lin():
+#     user_gp_active_func(f_lin)
+
+
+
+
+# def test_abs_gp_user():
+#     X_train = np.array([0.4, 0.7, 0.9, 1.1, 1.2, 1.35, 1.4])
+#     abs_values = np.array([0.999, 0.6, 0.3, 0.2, 0.22, 0.4, 0.5])
+#     #abs_values = np.array([0.4, 0.2, 0.2, 0.2, 0.1, 0.11, 0.3])
+
+
+#     gp = gr.PreferenceGP(gr.RBF_kern(0.3, 0.25), normalize_gp=True, \
+#             normalize_positive=True, \
+#             pareto_pairs=True, \
+#             other_probits={'abs': gr.AbsBoundProbit(1.0,10.0)})
+
+#     X_train = X_train[:, np.newaxis]
+#     gp.add(X_train[0:3], abs_values[0:3], type='abs')
+#     gp.add(np.array([[0.6]]), [])
+
+#     gp.add(X_train[3:], abs_values[3:], type='abs')
+
+#     step = 0.02
+#     X = np.arange(0.0, 1.5, step)
+#     mu, sigma = gp.predict(X)
+#     std = np.sqrt(sigma)
+
+#     pre = 0.3
+#     assert mu[int(0.4/step)] < 1 + pre
+#     assert mu[int(0.4/step)] > 1 - pre
+#     assert mu[int(0.95/step)] < 0 + pre
+#     assert mu[int(0.95/step)] > 0 - pre
+#     assert mu[int(1.4/step)] < 0.5 + pre
+#     assert mu[int(1.4/step)] > 0.5 - pre
+
+
+
+# def test_user_gp_ordinal():
+#     X_train = np.array([0,1,2,3,4.2,6,7])
+#     ratings = np.array([5,5,2,1,2  ,3,3])
+
+
+#     gp = gr.PreferenceGP(gr.RBF_kern(0.5, 0.7), \
+#             other_probits={'ordinal': gr.OrdinalProbit(2.0,1.0, n_ordinals=5)})
+#     #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5))
+#     #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2))
+#     #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3))
+#     #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3))
+
+#     gp.add(X_train, ratings, type='ordinal')
+
+#     #gp.optimize(optimize_hyperparameter=True)
+#     #print('gp.calc_ll()')
+#     #print(gp.calc_ll())
+
+#     step = 0.1
+#     X = np.arange(0, 8, step)
+#     mu, sigma = gp.predict(X)
+#     std = np.sqrt(sigma)
+
+#     assert mu[int(0.5/step)] > mu[int(3/step)]
+#     assert mu[int(6.5/step)] > mu[int(3/step)]
+#     assert mu[int(0.5/step)] > mu[int(6.5/step)]
+