Skip to content

Commit

Permalink
Added linear preferences, optimization not complete
Browse files Browse the repository at this point in the history
  • Loading branch information
ianran committed Nov 16, 2023
1 parent b7ff28e commit 3d46b3d
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/rdml_graph/gaussian_process/PreferenceGP.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ def __init__(self, cov_func, normalize_gp=True, pareto_pairs=False, \


for key in other_probits:
if not isinstance(other_probits[key], ProbitBase):
raise TypeError("Preference Linear pased a probit that is not a probit: " + str(other_probits[key]))

self.probits.append(other_probits[key])


Expand Down
120 changes: 120 additions & 0 deletions src/rdml_graph/gaussian_process/PreferenceLinear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright 2023 Ian Rankin
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
# to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# PreferenceLinear.py
# Written Ian Rankin - November 2023
#
# A linear latent function to learn the given preferences.

import numpy as np
import sys
if sys.version_info[0] >= 3 and sys.version_info[1] >= 3:
from collections.abc import Sequence
else:
from collections import Sequence

from rdml_graph.gaussian_process import PreferenceProbit, ProbitBase
from rdml_graph.gaussian_process import k_fold_half, get_dk
from rdml_graph.gaussian_process import PreferenceModel

import pdb

class PreferenceLinear(PreferenceModel):
## init function
# @param pareto_pairs - [opt] sets whether to assume pareto optimal user preferences.
# @param other_probits - [opt] sets additional types of probits to add.
def __init__(self, pareto_pairs=False, other_probits={}):
super(PreferenceLinear, self).__init__(pareto_pairs, other_probits)


self.probits = [PreferenceProbit(sigma = 1.0)]

self.lambda_newton = 0.3
for key in other_probits:
if not isinstance(other_probits[key], ProbitBase):
raise TypeError("PreferenceLinear pased a probit that is not a probit: " + str(other_probits[key]))

self.probits.append(other_probits[key])



## P_w
# Probability of w given the training data
def log_P_w(self, w):
log_p_w = 0.0
for j, probit in enumerate(self.probits):
if self.y_train[j] is not None:
p_w_local = probit.log_likelihood(self.y_train[j], F)

log_p_w += p_w_local

return log_p_w


## derivatives
# Calculates the derivatives for all of the given probits.
# @param y - the given set of labels for the probit
# this is given as a list of [(dk, u, v), ...]
# @param F - the input data samples
#
# @return - W, dpy_df, py
# W - is the second order derivative of the probit with respect to F
# dpy_df - the derivative of log P(y|x,theta) with respect to F
# py - log P(y|x,theta) for the given probit
def derivatives(self, x, y, w):
F = (x @ w[:,np.newaxis])[:,0]

W = np.zeros((len(F), len(F)))
grad_ll = np.zeros(len(F))
log_likelihood = 0
for j, probit in enumerate(self.probits):
if self.y_train[j] is not None:
W_local, dpy_df_local, py_local = probit.derivatives(y[j], F)

W += W_local
grad_ll += dpy_df_local
log_likelihood += py_local


# need to multiply by derivative of dl/df * df/dw
grad_ll = (grad_ll[np.newaxis,:] @ x)[0]
W = x.T @ W @ x

return W, grad_ll, log_likelihood

## optimize
# Runs the optimization step required by the user preference GP.
# @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters
def optimize(self, optimize_hyperparameter=False):
if len(self.X_train.shape) > 1:
self.w = np.random.random(self.X_train.shape[1])
else:
print('Only 1 reward parameter... linear model practically does not make sense')
self.w = np.random.random(1)


# just do gradient decent.
W, dpy_dw, py = self.derivatives(self.X_train, self.y_train, self.w)

pdb.set_trace()

self.optimized = True




8 changes: 8 additions & 0 deletions src/rdml_graph/gaussian_process/PreferenceModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,11 @@ def add(self, X, y, type='relative_discrete', training_sigma=0):
self.optimized = False



## optimize
# Runs the optimization step required by the user preference GP.
# @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters
def optimize(self, optimize_hyperparameter=False):
raise NotImplementedError("PreferenceModel optimize function not implemented")
self.optimized = True

1 change: 1 addition & 0 deletions src/rdml_graph/gaussian_process/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .OrdinalProbit import OrdinalProbit
from .AbsBoundProbit import AbsBoundProbit
from .PreferenceGP import PreferenceGP
from .PreferenceLinear import PreferenceLinear
from .HumanChoiceModel import p_human_choice, sample_human_choice
from .MutualInformationLearner import MutualInformationLearner
from .MutualUCBLearner import MutualUCBLearner
187 changes: 187 additions & 0 deletions tests/GaussianProcess/test_user_linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# test_user_GP.py
# Written Ian Rankin - October 2022
#
# A set of tests for the user preferences.

import pytest

import numpy as np
import rdml_graph as gr
import tqdm


def f_sin(x, data=None):
return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x))

def f_lin(x, data=None):
#return x[:,0]*x[:,1]
return x[:,0]+x[:,1]

def f_sq(x, data=None):
return x[:,0]*x[:,0] + 1.2*x[:,1]


def test_user_gp():
X_train = np.array([[0,0],[1,2],[2,4],[3,2],[4.2, 5.6],[6,2],[7,8]])
pairs = gr.generate_fake_pairs(X_train, f_lin, 0) + \
gr.generate_fake_pairs(X_train, f_lin, 1) + \
gr.generate_fake_pairs(X_train, f_lin, 2) + \
gr.generate_fake_pairs(X_train, f_lin, 3) + \
gr.generate_fake_pairs(X_train, f_lin, 4)


gp = gr.PreferenceLinear()
#gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5))
#gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2))
#gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3))
#gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3))

gp.add(X_train, pairs)

gp.optimize(optimize_hyperparameter=False)
#print('gp.calc_ll()')
#print(gp.calc_ll())


X = np.arange(-0.5, 8, 0.1)
mu, sigma = gp.predict(X)
std = np.sqrt(sigma)

y, sigma = gp.predict(X_train)

for i in range(len(X_train)):
if i != 0:
assert y[0] > y[i]
if i!= 1:
assert y[1] < y[i]



# def user_gp_active_func(utility_f):
# num_side = 25
# bounds = [(0,7), (0,7)]

# num_train_pts = 40
# num_alts = 4


# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,0.5)*gr.linear_kern(0.2, 0.1, 0))
# #gp = gr.PreferenceGP(gr.linear_kern(0.3, 0.1, 0.0))
# gp = gr.PreferenceGP(gr.RBF_kern(1.0, 1.0), pareto_pairs=True, \
# use_hyper_optimization=False, \
# active_learner = gr.DetLearner(1.0))
# gp.add_prior(bounds=np.array(bounds), num_pts=20)



# for i in tqdm.tqdm(range(10)):
# train_X = np.random.random((num_train_pts,2)) * np.array([bounds[0][1]-bounds[0][0], bounds[1][1]-bounds[1][0]]) + np.array([bounds[0][0], bounds[1][0]])
# train_Y = utility_f(train_X)#f_lin(train_X)

# #pdb.set_trace()
# selected_idx, UCB, best_value = gp.select(train_X, num_alts)
# #selected_idx = gp.active_learner.select_previous(train_X, num_alts=num_alts)

# best_idx = np.argmax(train_Y[selected_idx])

# pairs = gr.ranked_pairs_from_fake(train_X[selected_idx], utility_f)

# print(pairs)
# print(train_Y[selected_idx])
# print(train_X[selected_idx])


# gp.add(train_X[selected_idx], pairs)

# gp.optimize(optimize_hyperparameter=False)

# x = np.linspace(bounds[0][0], bounds[0][1], num_side)
# y = np.linspace(bounds[1][0], bounds[1][1], num_side)

# X, Y = np.meshgrid(x,y)
# points = np.vstack([X.ravel(), Y.ravel()]).transpose()
# z = utility_f(points)
# z_norm = np.linalg.norm(z, ord=np.inf)
# z = z / z_norm
# Z = np.reshape(z, (num_side, num_side))

# z_predicted, z_sigma = gp.predict(points)
# ucb_pred = z_predicted + np.sqrt(z_sigma)*1
# Z_pred = np.reshape(z_predicted, (num_side, num_side))
# UCB_pred = np.reshape(ucb_pred, (num_side, num_side))


# assert UCB_pred[-1,-1] > UCB_pred[0,0]
# assert UCB_pred[-2,-2] > UCB_pred[0,0]
# assert UCB_pred[-3,-3] > UCB_pred[0,0]



# def test_user_gp_active_sq():
# user_gp_active_func(f_sq)

# def test_user_gp_active_lin():
# user_gp_active_func(f_lin)




# def test_abs_gp_user():
# X_train = np.array([0.4, 0.7, 0.9, 1.1, 1.2, 1.35, 1.4])
# abs_values = np.array([0.999, 0.6, 0.3, 0.2, 0.22, 0.4, 0.5])
# #abs_values = np.array([0.4, 0.2, 0.2, 0.2, 0.1, 0.11, 0.3])


# gp = gr.PreferenceGP(gr.RBF_kern(0.3, 0.25), normalize_gp=True, \
# normalize_positive=True, \
# pareto_pairs=True, \
# other_probits={'abs': gr.AbsBoundProbit(1.0,10.0)})

# X_train = X_train[:, np.newaxis]
# gp.add(X_train[0:3], abs_values[0:3], type='abs')
# gp.add(np.array([[0.6]]), [])

# gp.add(X_train[3:], abs_values[3:], type='abs')

# step = 0.02
# X = np.arange(0.0, 1.5, step)
# mu, sigma = gp.predict(X)
# std = np.sqrt(sigma)

# pre = 0.3
# assert mu[int(0.4/step)] < 1 + pre
# assert mu[int(0.4/step)] > 1 - pre
# assert mu[int(0.95/step)] < 0 + pre
# assert mu[int(0.95/step)] > 0 - pre
# assert mu[int(1.4/step)] < 0.5 + pre
# assert mu[int(1.4/step)] > 0.5 - pre



# def test_user_gp_ordinal():
# X_train = np.array([0,1,2,3,4.2,6,7])
# ratings = np.array([5,5,2,1,2 ,3,3])


# gp = gr.PreferenceGP(gr.RBF_kern(0.5, 0.7), \
# other_probits={'ordinal': gr.OrdinalProbit(2.0,1.0, n_ordinals=5)})
# #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5))
# #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2))
# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3))
# #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3))

# gp.add(X_train, ratings, type='ordinal')

# #gp.optimize(optimize_hyperparameter=True)
# #print('gp.calc_ll()')
# #print(gp.calc_ll())

# step = 0.1
# X = np.arange(0, 8, step)
# mu, sigma = gp.predict(X)
# std = np.sqrt(sigma)

# assert mu[int(0.5/step)] > mu[int(3/step)]
# assert mu[int(6.5/step)] > mu[int(3/step)]
# assert mu[int(0.5/step)] > mu[int(6.5/step)]

0 comments on commit 3d46b3d

Please sign in to comment.