-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added linear preferences, optimization not complete
- Loading branch information
Showing
5 changed files
with
319 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# Copyright 2023 Ian Rankin | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this | ||
# software and associated documentation files (the "Software"), to deal in the Software | ||
# without restriction, including without limitation the rights to use, copy, modify, merge, | ||
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons | ||
# to whom the Software is furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in all copies or | ||
# substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | ||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE | ||
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
# DEALINGS IN THE SOFTWARE. | ||
|
||
# PreferenceLinear.py | ||
# Written Ian Rankin - November 2023 | ||
# | ||
# A linear latent function to learn the given preferences. | ||
|
||
import numpy as np | ||
import sys | ||
if sys.version_info[0] >= 3 and sys.version_info[1] >= 3: | ||
from collections.abc import Sequence | ||
else: | ||
from collections import Sequence | ||
|
||
from rdml_graph.gaussian_process import PreferenceProbit, ProbitBase | ||
from rdml_graph.gaussian_process import k_fold_half, get_dk | ||
from rdml_graph.gaussian_process import PreferenceModel | ||
|
||
import pdb | ||
|
||
class PreferenceLinear(PreferenceModel): | ||
## init function | ||
# @param pareto_pairs - [opt] sets whether to assume pareto optimal user preferences. | ||
# @param other_probits - [opt] sets additional types of probits to add. | ||
def __init__(self, pareto_pairs=False, other_probits={}): | ||
super(PreferenceLinear, self).__init__(pareto_pairs, other_probits) | ||
|
||
|
||
self.probits = [PreferenceProbit(sigma = 1.0)] | ||
|
||
self.lambda_newton = 0.3 | ||
for key in other_probits: | ||
if not isinstance(other_probits[key], ProbitBase): | ||
raise TypeError("PreferenceLinear pased a probit that is not a probit: " + str(other_probits[key])) | ||
|
||
self.probits.append(other_probits[key]) | ||
|
||
|
||
|
||
## P_w | ||
# Probability of w given the training data | ||
def log_P_w(self, w): | ||
log_p_w = 0.0 | ||
for j, probit in enumerate(self.probits): | ||
if self.y_train[j] is not None: | ||
p_w_local = probit.log_likelihood(self.y_train[j], F) | ||
|
||
log_p_w += p_w_local | ||
|
||
return log_p_w | ||
|
||
|
||
## derivatives | ||
# Calculates the derivatives for all of the given probits. | ||
# @param y - the given set of labels for the probit | ||
# this is given as a list of [(dk, u, v), ...] | ||
# @param F - the input data samples | ||
# | ||
# @return - W, dpy_df, py | ||
# W - is the second order derivative of the probit with respect to F | ||
# dpy_df - the derivative of log P(y|x,theta) with respect to F | ||
# py - log P(y|x,theta) for the given probit | ||
def derivatives(self, x, y, w): | ||
F = (x @ w[:,np.newaxis])[:,0] | ||
|
||
W = np.zeros((len(F), len(F))) | ||
grad_ll = np.zeros(len(F)) | ||
log_likelihood = 0 | ||
for j, probit in enumerate(self.probits): | ||
if self.y_train[j] is not None: | ||
W_local, dpy_df_local, py_local = probit.derivatives(y[j], F) | ||
|
||
W += W_local | ||
grad_ll += dpy_df_local | ||
log_likelihood += py_local | ||
|
||
|
||
# need to multiply by derivative of dl/df * df/dw | ||
grad_ll = (grad_ll[np.newaxis,:] @ x)[0] | ||
W = x.T @ W @ x | ||
|
||
return W, grad_ll, log_likelihood | ||
|
||
## optimize | ||
# Runs the optimization step required by the user preference GP. | ||
# @param optimize_hyperparameter - [opt] sets whether to optimize the hyperparameters | ||
def optimize(self, optimize_hyperparameter=False): | ||
if len(self.X_train.shape) > 1: | ||
self.w = np.random.random(self.X_train.shape[1]) | ||
else: | ||
print('Only 1 reward parameter... linear model practically does not make sense') | ||
self.w = np.random.random(1) | ||
|
||
|
||
# just do gradient decent. | ||
W, dpy_dw, py = self.derivatives(self.X_train, self.y_train, self.w) | ||
|
||
pdb.set_trace() | ||
|
||
self.optimized = True | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
# test_user_GP.py | ||
# Written Ian Rankin - October 2022 | ||
# | ||
# A set of tests for the user preferences. | ||
|
||
import pytest | ||
|
||
import numpy as np | ||
import rdml_graph as gr | ||
import tqdm | ||
|
||
|
||
def f_sin(x, data=None): | ||
return 2 * np.cos(np.pi * (x-2)) * np.exp(-(0.9*x)) | ||
|
||
def f_lin(x, data=None): | ||
#return x[:,0]*x[:,1] | ||
return x[:,0]+x[:,1] | ||
|
||
def f_sq(x, data=None): | ||
return x[:,0]*x[:,0] + 1.2*x[:,1] | ||
|
||
|
||
def test_user_gp(): | ||
X_train = np.array([[0,0],[1,2],[2,4],[3,2],[4.2, 5.6],[6,2],[7,8]]) | ||
pairs = gr.generate_fake_pairs(X_train, f_lin, 0) + \ | ||
gr.generate_fake_pairs(X_train, f_lin, 1) + \ | ||
gr.generate_fake_pairs(X_train, f_lin, 2) + \ | ||
gr.generate_fake_pairs(X_train, f_lin, 3) + \ | ||
gr.generate_fake_pairs(X_train, f_lin, 4) | ||
|
||
|
||
gp = gr.PreferenceLinear() | ||
#gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5)) | ||
#gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2)) | ||
#gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3)) | ||
#gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3)) | ||
|
||
gp.add(X_train, pairs) | ||
|
||
gp.optimize(optimize_hyperparameter=False) | ||
#print('gp.calc_ll()') | ||
#print(gp.calc_ll()) | ||
|
||
|
||
X = np.arange(-0.5, 8, 0.1) | ||
mu, sigma = gp.predict(X) | ||
std = np.sqrt(sigma) | ||
|
||
y, sigma = gp.predict(X_train) | ||
|
||
for i in range(len(X_train)): | ||
if i != 0: | ||
assert y[0] > y[i] | ||
if i!= 1: | ||
assert y[1] < y[i] | ||
|
||
|
||
|
||
# def user_gp_active_func(utility_f): | ||
# num_side = 25 | ||
# bounds = [(0,7), (0,7)] | ||
|
||
# num_train_pts = 40 | ||
# num_alts = 4 | ||
|
||
|
||
# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,0.5)*gr.linear_kern(0.2, 0.1, 0)) | ||
# #gp = gr.PreferenceGP(gr.linear_kern(0.3, 0.1, 0.0)) | ||
# gp = gr.PreferenceGP(gr.RBF_kern(1.0, 1.0), pareto_pairs=True, \ | ||
# use_hyper_optimization=False, \ | ||
# active_learner = gr.DetLearner(1.0)) | ||
# gp.add_prior(bounds=np.array(bounds), num_pts=20) | ||
|
||
|
||
|
||
# for i in tqdm.tqdm(range(10)): | ||
# train_X = np.random.random((num_train_pts,2)) * np.array([bounds[0][1]-bounds[0][0], bounds[1][1]-bounds[1][0]]) + np.array([bounds[0][0], bounds[1][0]]) | ||
# train_Y = utility_f(train_X)#f_lin(train_X) | ||
|
||
# #pdb.set_trace() | ||
# selected_idx, UCB, best_value = gp.select(train_X, num_alts) | ||
# #selected_idx = gp.active_learner.select_previous(train_X, num_alts=num_alts) | ||
|
||
# best_idx = np.argmax(train_Y[selected_idx]) | ||
|
||
# pairs = gr.ranked_pairs_from_fake(train_X[selected_idx], utility_f) | ||
|
||
# print(pairs) | ||
# print(train_Y[selected_idx]) | ||
# print(train_X[selected_idx]) | ||
|
||
|
||
# gp.add(train_X[selected_idx], pairs) | ||
|
||
# gp.optimize(optimize_hyperparameter=False) | ||
|
||
# x = np.linspace(bounds[0][0], bounds[0][1], num_side) | ||
# y = np.linspace(bounds[1][0], bounds[1][1], num_side) | ||
|
||
# X, Y = np.meshgrid(x,y) | ||
# points = np.vstack([X.ravel(), Y.ravel()]).transpose() | ||
# z = utility_f(points) | ||
# z_norm = np.linalg.norm(z, ord=np.inf) | ||
# z = z / z_norm | ||
# Z = np.reshape(z, (num_side, num_side)) | ||
|
||
# z_predicted, z_sigma = gp.predict(points) | ||
# ucb_pred = z_predicted + np.sqrt(z_sigma)*1 | ||
# Z_pred = np.reshape(z_predicted, (num_side, num_side)) | ||
# UCB_pred = np.reshape(ucb_pred, (num_side, num_side)) | ||
|
||
|
||
# assert UCB_pred[-1,-1] > UCB_pred[0,0] | ||
# assert UCB_pred[-2,-2] > UCB_pred[0,0] | ||
# assert UCB_pred[-3,-3] > UCB_pred[0,0] | ||
|
||
|
||
|
||
# def test_user_gp_active_sq(): | ||
# user_gp_active_func(f_sq) | ||
|
||
# def test_user_gp_active_lin(): | ||
# user_gp_active_func(f_lin) | ||
|
||
|
||
|
||
|
||
# def test_abs_gp_user(): | ||
# X_train = np.array([0.4, 0.7, 0.9, 1.1, 1.2, 1.35, 1.4]) | ||
# abs_values = np.array([0.999, 0.6, 0.3, 0.2, 0.22, 0.4, 0.5]) | ||
# #abs_values = np.array([0.4, 0.2, 0.2, 0.2, 0.1, 0.11, 0.3]) | ||
|
||
|
||
# gp = gr.PreferenceGP(gr.RBF_kern(0.3, 0.25), normalize_gp=True, \ | ||
# normalize_positive=True, \ | ||
# pareto_pairs=True, \ | ||
# other_probits={'abs': gr.AbsBoundProbit(1.0,10.0)}) | ||
|
||
# X_train = X_train[:, np.newaxis] | ||
# gp.add(X_train[0:3], abs_values[0:3], type='abs') | ||
# gp.add(np.array([[0.6]]), []) | ||
|
||
# gp.add(X_train[3:], abs_values[3:], type='abs') | ||
|
||
# step = 0.02 | ||
# X = np.arange(0.0, 1.5, step) | ||
# mu, sigma = gp.predict(X) | ||
# std = np.sqrt(sigma) | ||
|
||
# pre = 0.3 | ||
# assert mu[int(0.4/step)] < 1 + pre | ||
# assert mu[int(0.4/step)] > 1 - pre | ||
# assert mu[int(0.95/step)] < 0 + pre | ||
# assert mu[int(0.95/step)] > 0 - pre | ||
# assert mu[int(1.4/step)] < 0.5 + pre | ||
# assert mu[int(1.4/step)] > 0.5 - pre | ||
|
||
|
||
|
||
# def test_user_gp_ordinal(): | ||
# X_train = np.array([0,1,2,3,4.2,6,7]) | ||
# ratings = np.array([5,5,2,1,2 ,3,3]) | ||
|
||
|
||
# gp = gr.PreferenceGP(gr.RBF_kern(0.5, 0.7), \ | ||
# other_probits={'ordinal': gr.OrdinalProbit(2.0,1.0, n_ordinals=5)}) | ||
# #gp = gr.PreferenceGP(gr.periodic_kern(1.2,0.3,5)) | ||
# #gp = gr.PreferenceGP(gr.linear_kern(0.2, 0.2, 0.2)) | ||
# #gp = gr.PreferenceGP(gr.RBF_kern(0.2,1)+gr.periodic_kern(1,0.2,0)+gr.linear_kern(0.2,0.1,0.3)) | ||
# #gp = gr.PreferenceGP(gr.RBF_kern(0.1,1)*gr.linear_kern(0.3,0.2,0.3)) | ||
|
||
# gp.add(X_train, ratings, type='ordinal') | ||
|
||
# #gp.optimize(optimize_hyperparameter=True) | ||
# #print('gp.calc_ll()') | ||
# #print(gp.calc_ll()) | ||
|
||
# step = 0.1 | ||
# X = np.arange(0, 8, step) | ||
# mu, sigma = gp.predict(X) | ||
# std = np.sqrt(sigma) | ||
|
||
# assert mu[int(0.5/step)] > mu[int(3/step)] | ||
# assert mu[int(6.5/step)] > mu[int(3/step)] | ||
# assert mu[int(0.5/step)] > mu[int(6.5/step)] | ||
|