From 3d46b3d6412cda68e66a619733c7f32b8e675c2f Mon Sep 17 00:00:00 2001 From: ianran Date: Wed, 15 Nov 2023 17:36:33 -0800 Subject: [PATCH] Added linear preferences, optimization not complete --- .../gaussian_process/PreferenceGP.py | 3 + .../gaussian_process/PreferenceLinear.py | 120 +++++++++++ .../gaussian_process/PreferenceModel.py | 8 + src/rdml_graph/gaussian_process/__init__.py | 1 + tests/GaussianProcess/test_user_linear.py | 187 ++++++++++++++++++ 5 files changed, 319 insertions(+) create mode 100644 src/rdml_graph/gaussian_process/PreferenceLinear.py create mode 100644 tests/GaussianProcess/test_user_linear.py diff --git a/src/rdml_graph/gaussian_process/PreferenceGP.py b/src/rdml_graph/gaussian_process/PreferenceGP.py index 488e9ef..9c66fb3 100644 --- a/src/rdml_graph/gaussian_process/PreferenceGP.py +++ b/src/rdml_graph/gaussian_process/PreferenceGP.py @@ -78,6 +78,9 @@ def __init__(self, cov_func, normalize_gp=True, pareto_pairs=False, \ for key in other_probits: + if not isinstance(other_probits[key], ProbitBase): + raise TypeError("Preference Linear pased a probit that is not a probit: " + str(other_probits[key])) + self.probits.append(other_probits[key]) diff --git a/src/rdml_graph/gaussian_process/PreferenceLinear.py b/src/rdml_graph/gaussian_process/PreferenceLinear.py new file mode 100644 index 0000000..72dd238 --- /dev/null +++ b/src/rdml_graph/gaussian_process/PreferenceLinear.py @@ -0,0 +1,120 @@ +# Copyright 2023 Ian Rankin +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +# to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or +# substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# PreferenceLinear.py +# Written Ian Rankin - November 2023 +# +# A linear latent function to learn the given preferences. + +import numpy as np +import sys +if sys.version_info[0] >= 3 and sys.version_info[1] >= 3: + from collections.abc import Sequence +else: + from collections import Sequence + +from rdml_graph.gaussian_process import PreferenceProbit, ProbitBase +from rdml_graph.gaussian_process import k_fold_half, get_dk +from rdml_graph.gaussian_process import PreferenceModel + +import pdb + +class PreferenceLinear(PreferenceModel): + ## init function + # @param pareto_pairs - [opt] sets whether to assume pareto optimal user preferences. + # @param other_probits - [opt] sets additional types of probits to add. + def __init__(self, pareto_pairs=False, other_probits={}): + super(PreferenceLinear, self).__init__(pareto_pairs, other_probits) + + + self.probits = [PreferenceProbit(sigma = 1.0)] + + self.lambda_newton = 0.3 + for key in other_probits: + if not isinstance(other_probits[key], ProbitBase): + raise TypeError("PreferenceLinear pased a probit that is not a probit: " + str(other_probits[key])) + + self.probits.append(other_probits[key]) + + + + ## P_w + # Probability of w given the training data + def log_P_w(self, w): + log_p_w = 0.0 + for j, probit in enumerate(self.probits): + if self.y_train[j] is not None: + p_w_local = probit.log_likelihood(self.y_train[j], F) + + log_p_w += p_w_local + + return log_p_w + + + ## derivatives + # Calculates the derivatives for all of the given probits. + # @param y - the given set of labels for the probit + # this is given as a list of [(dk, u, v), ...] + # @param F - the input data samples + # + # @return - W, dpy_df, py + # W - is the second order derivative of the probit with respect to F + # dpy_df - the derivative of log P(y|x,theta) with respect to F + # py - log P(y|x,theta) for the given probit + def derivatives(self, x, y, w): + F = (x @ w[:,np.newaxis])[:,0] + + W = np.zeros((len(F), len(F))) + grad_ll = np.zeros(len(F)) + log_likelihood = 0 + for j, probit in enumerate(self.probits): + if self.y_train[j] is not None: + W_local, dpy_df_local, py_local = probit.derivatives(y[j], F) + + W += W_local + grad_ll += dpy_df_local + log_likelihood += py_local + + + # need to multiply by derivative of dl/df * df/dw + grad_ll = (grad_ll[np.newaxis,:] @ x)[0] + W = x.T @ W @ x + + return W, grad_ll, log_likelihood + + ## optimize + # Runs the optimization step required by the user preference GP. + # @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters + def optimize(self, optimize_hyperparameter=False): + if len(self.X_train.shape) > 1: + self.w = np.random.random(self.X_train.shape[1]) + else: + print('Only 1 reward parameter... linear model practically does not make sense') + self.w = np.random.random(1) + + + # just do gradient decent. + W, dpy_dw, py = self.derivatives(self.X_train, self.y_train, self.w) + + pdb.set_trace() + + self.optimized = True + + + + diff --git a/src/rdml_graph/gaussian_process/PreferenceModel.py b/src/rdml_graph/gaussian_process/PreferenceModel.py index c6c17d6..3523ffe 100644 --- a/src/rdml_graph/gaussian_process/PreferenceModel.py +++ b/src/rdml_graph/gaussian_process/PreferenceModel.py @@ -237,3 +237,11 @@ def add(self, X, y, type='relative_discrete', training_sigma=0): self.optimized = False + + ## optimize + # Runs the optimization step required by the user preference GP. + # @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters + def optimize(self, optimize_hyperparameter=False): + raise NotImplementedError("PreferenceModel optimize function not implemented") + self.optimized = True + diff --git a/src/rdml_graph/gaussian_process/__init__.py b/src/rdml_graph/gaussian_process/__init__.py index bde89e5..6ba07b6 100644 --- a/src/rdml_graph/gaussian_process/__init__.py +++ b/src/rdml_graph/gaussian_process/__init__.py @@ -8,6 +8,7 @@ from .OrdinalProbit import OrdinalProbit from .AbsBoundProbit import AbsBoundProbit from .PreferenceGP import PreferenceGP +from .PreferenceLinear import PreferenceLinear from .HumanChoiceModel import p_human_choice, sample_human_choice from .MutualInformationLearner import MutualInformationLearner from .MutualUCBLearner import MutualUCBLearner diff --git a/tests/GaussianProcess/test_user_linear.py b/tests/GaussianProcess/test_user_linear.py new file mode 100644 index 0000000..0116b51 --- /dev/null +++ b/tests/GaussianProcess/test_user_linear.py @@ -0,0 +1,187 @@ +# test_user_GP.py +# Written Ian Rankin - October 2022 +# +# A set of tests for the user preferences. + +import pytest + +import numpy as np +import rdml_graph as gr +import tqdm + + +def f_sin(x, data=None): + return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x)) + +def f_lin(x, data=None): + #return x[:,0]*x[:,1] + return x[:,0]+x[:,1] + +def f_sq(x, data=None): + return x[:,0]*x[:,0] + 1.2*x[:,1] + + +def test_user_gp(): + X_train = np.array([[0,0],[1,2],[2,4],[3,2],[4.2, 5.6],[6,2],[7,8]]) + pairs = gr.generate_fake_pairs(X_train, f_lin, 0) + \ + gr.generate_fake_pairs(X_train, f_lin, 1) + \ + gr.generate_fake_pairs(X_train, f_lin, 2) + \ + gr.generate_fake_pairs(X_train, f_lin, 3) + \ + gr.generate_fake_pairs(X_train, f_lin, 4) + + + gp = gr.PreferenceLinear() + #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5)) + #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2)) + #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3)) + #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3)) + + gp.add(X_train, pairs) + + gp.optimize(optimize_hyperparameter=False) + #print('gp.calc_ll()') + #print(gp.calc_ll()) + + + X = np.arange(-0.5, 8, 0.1) + mu, sigma = gp.predict(X) + std = np.sqrt(sigma) + + y, sigma = gp.predict(X_train) + + for i in range(len(X_train)): + if i != 0: + assert y[0] > y[i] + if i!= 1: + assert y[1] < y[i] + + + +# def user_gp_active_func(utility_f): +# num_side = 25 +# bounds = [(0,7), (0,7)] + +# num_train_pts = 40 +# num_alts = 4 + + +# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,0.5)*gr.linear_kern(0.2, 0.1, 0)) +# #gp = gr.PreferenceGP(gr.linear_kern(0.3, 0.1, 0.0)) +# gp = gr.PreferenceGP(gr.RBF_kern(1.0, 1.0), pareto_pairs=True, \ +# use_hyper_optimization=False, \ +# active_learner = gr.DetLearner(1.0)) +# gp.add_prior(bounds=np.array(bounds), num_pts=20) + + + +# for i in tqdm.tqdm(range(10)): +# train_X = np.random.random((num_train_pts,2)) * np.array([bounds[0][1]-bounds[0][0], bounds[1][1]-bounds[1][0]]) + np.array([bounds[0][0], bounds[1][0]]) +# train_Y = utility_f(train_X)#f_lin(train_X) + +# #pdb.set_trace() +# selected_idx, UCB, best_value = gp.select(train_X, num_alts) +# #selected_idx = gp.active_learner.select_previous(train_X, num_alts=num_alts) + +# best_idx = np.argmax(train_Y[selected_idx]) + +# pairs = gr.ranked_pairs_from_fake(train_X[selected_idx], utility_f) + +# print(pairs) +# print(train_Y[selected_idx]) +# print(train_X[selected_idx]) + + +# gp.add(train_X[selected_idx], pairs) + +# gp.optimize(optimize_hyperparameter=False) + +# x = np.linspace(bounds[0][0], bounds[0][1], num_side) +# y = np.linspace(bounds[1][0], bounds[1][1], num_side) + +# X, Y = np.meshgrid(x,y) +# points = np.vstack([X.ravel(), Y.ravel()]).transpose() +# z = utility_f(points) +# z_norm = np.linalg.norm(z, ord=np.inf) +# z = z / z_norm +# Z = np.reshape(z, (num_side, num_side)) + +# z_predicted, z_sigma = gp.predict(points) +# ucb_pred = z_predicted + np.sqrt(z_sigma)*1 +# Z_pred = np.reshape(z_predicted, (num_side, num_side)) +# UCB_pred = np.reshape(ucb_pred, (num_side, num_side)) + + +# assert UCB_pred[-1,-1] > UCB_pred[0,0] +# assert UCB_pred[-2,-2] > UCB_pred[0,0] +# assert UCB_pred[-3,-3] > UCB_pred[0,0] + + + +# def test_user_gp_active_sq(): +# user_gp_active_func(f_sq) + +# def test_user_gp_active_lin(): +# user_gp_active_func(f_lin) + + + + +# def test_abs_gp_user(): +# X_train = np.array([0.4, 0.7, 0.9, 1.1, 1.2, 1.35, 1.4]) +# abs_values = np.array([0.999, 0.6, 0.3, 0.2, 0.22, 0.4, 0.5]) +# #abs_values = np.array([0.4, 0.2, 0.2, 0.2, 0.1, 0.11, 0.3]) + + +# gp = gr.PreferenceGP(gr.RBF_kern(0.3, 0.25), normalize_gp=True, \ +# normalize_positive=True, \ +# pareto_pairs=True, \ +# other_probits={'abs': gr.AbsBoundProbit(1.0,10.0)}) + +# X_train = X_train[:, np.newaxis] +# gp.add(X_train[0:3], abs_values[0:3], type='abs') +# gp.add(np.array([[0.6]]), []) + +# gp.add(X_train[3:], abs_values[3:], type='abs') + +# step = 0.02 +# X = np.arange(0.0, 1.5, step) +# mu, sigma = gp.predict(X) +# std = np.sqrt(sigma) + +# pre = 0.3 +# assert mu[int(0.4/step)] < 1 + pre +# assert mu[int(0.4/step)] > 1 - pre +# assert mu[int(0.95/step)] < 0 + pre +# assert mu[int(0.95/step)] > 0 - pre +# assert mu[int(1.4/step)] < 0.5 + pre +# assert mu[int(1.4/step)] > 0.5 - pre + + + +# def test_user_gp_ordinal(): +# X_train = np.array([0,1,2,3,4.2,6,7]) +# ratings = np.array([5,5,2,1,2 ,3,3]) + + +# gp = gr.PreferenceGP(gr.RBF_kern(0.5, 0.7), \ +# other_probits={'ordinal': gr.OrdinalProbit(2.0,1.0, n_ordinals=5)}) +# #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5)) +# #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2)) +# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3)) +# #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3)) + +# gp.add(X_train, ratings, type='ordinal') + +# #gp.optimize(optimize_hyperparameter=True) +# #print('gp.calc_ll()') +# #print(gp.calc_ll()) + +# step = 0.1 +# X = np.arange(0, 8, step) +# mu, sigma = gp.predict(X) +# std = np.sqrt(sigma) + +# assert mu[int(0.5/step)] > mu[int(3/step)] +# assert mu[int(6.5/step)] > mu[int(3/step)] +# assert mu[int(0.5/step)] > mu[int(6.5/step)] +