diff --git a/experiments/.gitignore b/experiments/.gitignore index 68bcbc9..9b5f182 100644 --- a/experiments/.gitignore +++ b/experiments/.gitignore @@ -1 +1,3 @@ -results/ \ No newline at end of file +results/ + +**.rew \ No newline at end of file diff --git a/experiments/GP_visualization.py b/experiments/GP_visualization.py new file mode 100644 index 0000000..6fcbea5 --- /dev/null +++ b/experiments/GP_visualization.py @@ -0,0 +1,95 @@ +# GP_visualization.py +# Written Ian Rankin December 2021 +# +# A set of functions to visualize the learned GPs. + +import numpy as np +import matplotlib.pyplot as plt +import pdb + +import lop + +def record_gp_state(model, fake_f, bounds=[(0,0),(1.5,1.5)], folder='./', \ + file_header='', visualize=False): + # Generate test grid + num_side = 25 + x = np.linspace(bounds[0][0], bounds[0][1], num_side) + y = np.linspace(bounds[1][0], bounds[1][1], num_side) + + X, Y = np.meshgrid(x,y) + pts = np.vstack([X.ravel(), Y.ravel()]).transpose() + + + fake_ut = fake_f(pts) + max_fake = np.linalg.norm(fake_ut, ord=np.inf) + fake_ut = fake_ut / max_fake + + pred_ut, pred_sigma = model.predict_large(pts) + + + # save useful data + # save entire pickle file + gp_filename = folder+file_header+'_gp.p' + print(gp_filename) + #pickle.dump(gp, open(gp_filename, "wb")) + # Save useful visualization data + viz_filename = folder+file_header+'_viz' + print(viz_filename) + np.savez(viz_filename, \ + pts=pts, \ + fake_ut=fake_ut, \ + pred_ut=pred_ut, \ + pred_sigma=pred_sigma, \ + GP_pts=model.X_train, \ + GP_pref=model.y_train, \ + GP_prior_idx=model.prior_idx) + + + if visualize == True: + visualize_data(X, Y, num_side, fake_ut, pred_ut, pred_sigma, + model.X_train, model.prior_idx, \ + folder, file_header, \ + also_display=False) + + +def visualize_data(X,Y, num_side, fake_ut, pred_ut, pred_sigma, \ + GP_pts, GP_prior_idx, \ + folder='./', file_header='', also_display=False): + + Z_pred = np.reshape(pred_ut, (num_side, num_side)) + Z_fake = np.reshape(fake_ut, (num_side, num_side)) + Z_sigma = np.reshape(pred_sigma, (num_side, num_side)) + Z_std = np.sqrt(Z_sigma) + Z_ucb = Z_pred + Z_std + + plt.figure() + + plt.pcolor(X, Y, Z_pred) + plt.contour(X, Y, Z_pred) + if GP_prior_idx is None: + GP_prior_idx = [0,0] + if GP_pts is not None: + plt.scatter(GP_pts[GP_prior_idx[1]:,0],GP_pts[GP_prior_idx[1]:,1]) + else: + plt.scatter([],[]) + plt.title(file_header+'_active samples') + plt.xlabel('Migratory fish reward') + plt.ylabel('Sea floor fish reward') + + plt.savefig(folder+file_header+'_active_samples.jpg') + + plt.figure() + + plt.pcolor(X, Y, Z_pred) + plt.contour(X, Y, Z_pred) + if GP_pts is not None: + plt.scatter(GP_pts[:,0],GP_pts[:,1]) + else: + plt.scatter([],[]) + plt.title(file_header+'_with prior points') + plt.xlabel('Migratory fish reward') + plt.ylabel('Sea floor fish reward') + plt.savefig(folder+file_header+'_with_prior.jpg') + + if also_display: + plt.show() diff --git a/experiments/config.yaml b/experiments/config.yaml new file mode 100644 index 0000000..c000354 --- /dev/null +++ b/experiments/config.yaml @@ -0,0 +1,32 @@ +######### for active learning selectors +default_to_pareto: false +always_select_best: true + +# alpha +UCB_scalar: 1.0 + + + +######## For model +pareto_pairs: false + +rbf_sigma: 0.3 +rbf_lengthscale: 0.25 + +hyperparameter_optimization: false + +# does this need to be updated for depending on the fake function being used? +normalize_gp: true +normalize_postive: false + + +add_model_prior: false +prior_pts: 25 +prior_bounds: + - + - 0.0 + - 1.5 + - + - 0.0 + - 1.5 + diff --git a/experiments/experiment_helper.py b/experiments/experiment_helper.py new file mode 100644 index 0000000..1140606 --- /dev/null +++ b/experiments/experiment_helper.py @@ -0,0 +1,281 @@ +# Copyright 2024 Ian Rankin +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +# to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or +# substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# test_helper.py +# Written Ian Rankin - January 2024 +# +# A set of functions to setup and perform the active learning training and eval. + +import numpy as np +import oyaml as yaml +import pickle + +import lop + +from GP_visualization import record_gp_state, visualize_data + +def get_active_learner(selector, selection_type, UCB_scalar, config, fake_func=None): + default_to_pareto = config['default_to_pareto'] + if selection_type == 'rating': + always_select_best = False + else: + always_select_best = config['always_select_best'] + + + al = None + if selector == 'UCB': + al = lop.UCBLearner(UCB_scalar, default_to_pareto, always_select_best) + elif selector == 'SGV_UCB': + al = lop.GV_UCBLearner(UCB_scalar, default_to_pareto, always_select_best) + elif selector == 'MUTUAL_INFO': + al = lop.MutualInfoLearner(None, default_to_pareto, always_select_best) + elif selector == 'MUTUAL_INFO': + al = lop.MutualInfoLearner(None, default_to_pareto, always_select_best) + elif selector == 'MUTUAL_INFO_PERF': + al = lop.MutualInfoLearner(fake_func, default_to_pareto, always_select_best) + elif selector == 'RANDOM': + al = lop.RandomLearner(default_to_pareto, always_select_best) + + return al + +def get_model(model_desc, active_learner, config): + pareto_pairs = config['pareto_pairs'] + + model = None + if model_desc == 'gp': + model = lop.PreferenceGP( + cov_func=lop.RBF_kern(config['rbf_sigma'], config['rbf_lengthscale']), + normalize_gp=config['normalize_gp'], + pareto_pairs=pareto_pairs, + normalize_positive=config['normalize_postive'], + use_hyper_optimization=config['hyperparameter_optimization'], + active_learner= active_learner + ) + elif model_desc == 'linear': + model = lop.PreferenceLinear(pareto_pairs=pareto_pairs,\ + active_learner=active_learner) + + return model + + +def get_fake_func(fake_func_desc, config): + if fake_func_desc == 'linear': + func = lop.FakeLinear() + elif fake_func_desc == 'squared': + func = lop.FakeSquared() + elif fake_func_desc == 'logistic': + func = lop.FakeLogistic() + elif fake_func_desc == 'sin_exp': + func = lop.FakeSinExp() + + return func + +# Linear [0.17191523855986895, 2.41264845341967] +# Multiply (0.0208195, 5.8210312) +# F [550.2325553878064, 1.3174585554818629e+32] +rating_bounds_global = [0.17191523855986895, 2.41264845341967] +def rating_score_from_fake_f(rewards, fake_func, rating_bounds): + decision_pts = np.arange(rating_bounds[0], rating_bounds[1], (rating_bounds[1] - rating_bounds[0]) / 5) + decision_pts[0] = 0 + real_y = fake_func(rewards[np.newaxis,:]) + + + ratings = [] + for y in real_y: + idx = -1 + for i, pt in enumerate(decision_pts): + print('y: ' + str(y) + ' pt: ' + str(pt)) + if y > pt: + idx = i + + ratings.append(idx/5.0 + 0.1) + + return ratings + + +def train_and_eval(config_filename, + env_num, + fake_func_desc, + folder, + selector='UCB', + selection_type='choose1', + model_desc='gp', + fake_function_desc='linear', + num_alts=4, + UCB_scaler=None, + rbf_sigma=None, + synth_user='perfect', + verbose = False): + # + with open(config_filename, 'rb') as f: + config = yaml.load(f.read(), Loader=yaml.Loader) + + with open('simple_rewards.rew', 'rb') as f: + path_data = pickle.load(f) + + train_data = path_data['train'] + eval_data = path_data['eval'] + + + if rbf_sigma is not None: + config['rbf_sigma'] = rbf_sigma + if UCB_scaler is None: + UCB_scaler = config['UCB_scalar'] + + num_training = 10 + + #### Get required models and fake functions + active_learner = get_active_learner(selector, selection_type, UCB_scaler, config) + model = get_model(model_desc, active_learner, config) + utility_f = get_fake_func(fake_function_desc, config) + + + if config['add_model_prior']: + model.add_prior(bounds = config['prior_bounds'], num_pts=config['prior_pts']) + + # setup storage of scores + accuracy = np.zeros(num_training+1) + avg_selection = np.zeros(num_training+1) + all_ranks = np.zeros((num_training+1, len(eval_data[0]))) + + estimated_scores = np.zeros((num_training+1, len(eval_data[0]))) + real_scores = np.zeros((num_training+1, len(eval_data[0]))) + score_diff = np.zeros((num_training+1, len(eval_data[0]))) + + # test untrained model + accuracy[0], avg_selection[0], all_ranks[0], estimated_scores[0], real_scores[0], score_diff[0] = \ + evaluation(env_num, utility_f, config, model, eval_data) + + str_header = str(0) + str_header.zfill(3) + str_header = 'trainitr_'+str_header + + record_gp_state(model, utility_f, config['prior_bounds'], folder, \ + file_header=str_header, visualize=True) + + bounds = [(0,0), (0,0)] + rand_order = np.random.choice(len(train_data[env_num]), size=num_training, replace=False) + + ##### Start main training loop + for itr, i in enumerate(rand_order): + if verbose: + print('\t\tNUMBER of X_train: ' + str(len(model.X_train))) + + + ##### Extract rewards for environment domain + rewards, indicies = train_data[env_num][i]['rewards'], train_data[env_num][i]['indicies'] + + + + #### update bounds + mins = np.amin(rewards, axis=1) + maxs = np.amax(rewards, axis=1) + for j in range(len(bounds)): + if mins[j] < bounds[j][0]: + bounds[j] = (mins[j], bounds[j][1]) + if maxs[j] > bounds[j][1]: + bounds[j] = (bounds[j][0], maxs[j]) + + if selection_type == 'choose1' or selection_type == 'ranking': + if config['pareto_pairs']: + sel_idx, non_shown_paths = model.select(rewards, num_alts, + prefer_pts=len(indicies['pareto']), + return_not_selected=True) + else: + sel_idx = model.select(rewards, num_alts) + non_shown_paths = [] + + y_fake = utility_f(rewards[sel_idx]) + + # get pairs + if selection_type == 'choose1': + if synth_user == 'perfect': + best_idx = np.argmax(y_fake) + else: + best_idx = lop.sample_human_choice(y_fake) + y_pairs = lop.gen_pairs_from_idx(best_idx, list(range(len(y_fake)))) + else: + if synth_user != 'perfect': + raise(Exception('human choice not implemented for ranking')) + y_pairs = lop.generate_ranking_pairs(rewards[sel_idx], utility_f) + + model.add(rewards[sel_idx], y_pairs) + + # add non shown pareto paths if desired + if len(non_shown_paths) > 0: + model.add(rewards[non_shown_paths], []) + + elif selection_type == 'rating': + sel_idx = model.select(rewards, num_alts) + + rating = rating_score_from_fake_f(rewards[sel_idx], utility_f, rating_bounds_global) + rating_np = np.array(rating) + + model.add(rewards[sel_idx], rating_np, type='abs') + # end else if for selection type + + accuracy[itr+1], avg_selection[itr+1], all_ranks[itr+1], estimated_scores[itr+1], real_scores[itr+1], score_diff[itr+1] = \ + evaluation(env_num, utility_f, config, model, eval_data) + + str_header = str(itr+1) + str_header.zfill(3) + str_header = 'trainitr_'+str_header + + record_gp_state(model, utility_f, config['prior_bounds'], folder, \ + file_header=str_header, visualize=True) + # end for loop for training loop + + return accuracy, avg_selection, all_ranks, estimated_scores, real_scores, score_diff + + + +def evaluation(env_num, utility_f, config, model, eval_data): + num_correct = 0 + rank_sum = 0 + + num_eval = len(eval_data[env_num]) + ranks = np.empty(num_eval) + + estimated_scores = np.empty(num_eval) + real_scores = np.empty(num_eval) + score_diff = np.empty(num_eval) + + for i in range(num_eval): + ##### Generate paths and select paths for explanation + rewards, indicies = eval_data[env_num][i]['rewards'], eval_data[env_num][i]['indicies'] + scores = model(rewards) + + best_idx = model.active_learner.select_best(scores, set(indicies['pareto'])) + indicies['best'] = best_idx + + fake_utility = utility_f(rewards) + sorted_idx = np.argsort(fake_utility)[::-1] + best_path = sorted_idx[0] + selected_rank = np.where(sorted_idx == indicies['best'])[0][0] + + estimated_scores[i] = scores[indicies['best']] + real_scores[i] = fake_utility[indicies['best']] + score_diff[i] = fake_utility[best_path] - fake_utility[indicies['best']] + + if selected_rank == 0: + num_correct += 1 + + rank_sum += selected_rank + ranks[i] = selected_rank + + return num_correct / num_eval, rank_sum / num_eval, ranks, estimated_scores, real_scores, score_diff \ No newline at end of file diff --git a/experiments/single_test.py b/experiments/single_experiment.py similarity index 84% rename from experiments/single_test.py rename to experiments/single_experiment.py index c30d965..1cd357b 100644 --- a/experiments/single_test.py +++ b/experiments/single_experiment.py @@ -30,12 +30,15 @@ import lop +from experiment_helper import train_and_eval + import sys import os import time +import pdb + -test_short_plans=False plan_file_header='./' train_files = [['1.plan', '2.plan', '3.plan', '4.plan', '5.plan', '6.plan', '7.plan', '8.plan', '9.plan', '10.plan', '11.plan', '12.plan', '13.plan', '14.plan', '15.plan', '16.plan', '17.plan', '18.plan', '19.plan', '20.plan', ], @@ -91,34 +94,47 @@ def str_timestamp(): timestamp = time.strftime('%Y-%m-%d_%H-%M-%S', t) return timestamp +possible_selectors = ['UCB', 'SGV_UCB', 'RANDOM', 'MUTUAL_INFO', 'MUTUAL_INFO_PERF'] +possible_selection_types = ['choose1', 'ranking', 'rating'] +possible_fake_funcs = ['linear', 'squared', 'logistic', 'sin_exp'] +possible_models = ['gp', 'linear'] def main(): # Read in arguments required parser = argparse.ArgumentParser(description='User preferences') - parser.add_argument('--config', type=str, default='../explainability/cfg/config.yaml') + parser.add_argument('--config', type=str, default='./config.yaml') parser.add_argument('--dir', type=str, default='') parser.add_argument('--env', type=int, default=2, help='environment number, using 2,4,7 [0,9] allowed') - parser.add_argument('--model', type=str, default='gp', help='Set the model to [gp, linear]') - parser.add_argument('--selector', type=str, default='SGV_UCB', help='Set the selectors to use options [UCB SGV_UCB random RATINGS RANKING_SGV_UCB RANKING_UCB RANKING_SGV_UCB MUTUAL_INFO RANKING_MUTUAL_INFO]') + parser.add_argument('--model', type=str, default='gp', help='Set the model to '+str(possible_models)) + parser.add_argument('--selector', type=str, default='SGV_UCB', help='Set the selectors to use options '+str(possible_selectors)) + parser.add_argument('--sel_type', type=str, default='choose1', help='Set the selection type to use options '+str(possible_selection_types)) parser.add_argument('--num_runs', type=int, default=100, help='The number of runs') + parser.add_argument('--num_alts', type=int, default=4, help='The number of alternatives to show to the user') parser.add_argument('--user', type=str, default='human_choice', help='Set the synthetic user type to use options [ perfect human_choice ]') - parser.add_argument('--fake_func', type=str, default='linear', help='fake function for synthetic user: [linear, squared, logistic, sin_exp]') + parser.add_argument('--fake_func', type=str, default='linear', help='fake function for synthetic user: '+str(possible_fake_funcs)) parser.add_argument('--test_experiment', type=bool, default=False, help='Shortens the number of plans to make testing the experiment easier') parser.add_argument('-v', type=bool, default=False, help='Verbose print statements') args = parser.parse_args() + global train_files + global eval_files if args.test_experiment: train_files = train_files_short eval_files = eval_files_short + train_files = [[plan_header+file for file in env] for env in train_files] + eval_files = [[plan_header+file for file in env] for env in eval_files] - if args.selector not in ['UCB', 'random', 'SGV_UCB', 'RATINGS', 'RANKING_UCB', 'RANKING_SGV_UCB', 'MUTUAL_INFO', 'RANKING_MUTUAL_INFO']: - print('Selector should be one of these [UCB SGV_UCB random RATINGS RANKING_SGV_UCB RANKING_UCB MUTUAL_INFO RANKING_MUTUAL_INFO] not ' + str(args.selector)) + if args.selector not in possible_selectors: + print('Selector should be one of these '+str(possible_selectors)+' not ' + str(args.selector)) + sys.exit(0) + if args.fake_func not in possible_fake_funcs: + print('fake_func should be one of these '+str(possible_fake_funcs)+' not ' + str(args.fake_func)) sys.exit(0) - if args.fake_func not in ['linear', 'squared', 'logistic', 'sin_exp']: - print('fake_func should be one of these [linear, squared, logistic, sin_exp] not ' + str(args.fake_func)) + if args.model not in possible_models: + print('model should be one of these '+str(possible_models)+' not ' + str(args.model)) sys.exit(0) - if args.model not in ['gp', 'linear']: - print('model should be one of these [gp, linear] not ' + str(args.model)) + if args.sel_type not in possible_selection_types: + print('selection types should be one of these '+str(possible_selection_types)+' not ' + str(args.model)) sys.exit(0) # create a results folder named by the selector, user type, fake_func, and environment number. @@ -130,17 +146,6 @@ def main(): os.mkdir(folder_name) - # get fake functions - func = None - if args.fake_func == 'linear': - func = lop.FakeLinear() - elif args.fake_func == 'squared': - func = lop.FakeSquared() - elif args.fake_func == 'logistic': - func = lop.FakeLogistic() - elif args.fake_func == 'sin_exp': - func = lop.FakeSinExp() - num_runs = args.num_runs num_eval = len(eval_files[0]) num_train = len(train_files[0]) @@ -160,16 +165,16 @@ def main(): cur_run = j run_folder = folder_name+'run_'+str(j)+'/' os.mkdir(run_folder) - func.randomize() print('VERBOSE: ' + str(args.v)) accuracy, avg_selection, all_ranks, est_score, real_score, s_diff = \ train_and_eval( args.config, \ env_num=args.env, \ - utility_f=func, \ + fake_func_desc=args.fake_func, \ folder=run_folder, \ selector=args.selector, \ - rbf_sigma=rbf_sigma, \ - UCB_scaler=UCB_scaler, \ + selection_type=args.sel_type, + model_desc=args.model, \ + num_alts = args.num_alts, \ synth_user=args.user, \ verbose = args.v) @@ -206,8 +211,5 @@ def main(): if __name__ == '__main__': plan_header = plan_file_header+'saved_plans/' - train_files = [[plan_header+file for file in env] for env in train_files] - eval_files = [[plan_header+file for file in env] for env in eval_files] - main() diff --git a/src/lop/models/PreferenceGP.py b/src/lop/models/PreferenceGP.py index 0fee9fc..c7e8b10 100644 --- a/src/lop/models/PreferenceGP.py +++ b/src/lop/models/PreferenceGP.py @@ -140,7 +140,7 @@ def predict(self, X): # @return an array of output values (n), other output data (variance, covariance,etc) def predict_large(self,X): # lazy optimization of GP - if not self.optimized: + if not self.optimized and self.X_train is not None: self.optimize(optimize_hyperparameter=self.use_hyper_optimization) num_at_a_time = 15 diff --git a/src/lop/utilities/__init__.py b/src/lop/utilities/__init__.py index c4cb989..28da7d7 100644 --- a/src/lop/utilities/__init__.py +++ b/src/lop/utilities/__init__.py @@ -1,6 +1,6 @@ # init the utilities subfolder -from .preference_pairs import get_dk, gen_pairs_from_idx, ranked_pairs_from_fake, generate_fake_pairs +from .preference_pairs import get_dk, gen_pairs_from_idx, ranked_pairs_from_fake, generate_fake_pairs, generate_ranking_pairs from .training_utility import k_fold_half from .human_choice_model import p_human_choice, sample_human_choice from .pareto import get_pareto diff --git a/src/lop/utilities/preference_pairs.py b/src/lop/utilities/preference_pairs.py index 85c34d4..062a03f 100644 --- a/src/lop/utilities/preference_pairs.py +++ b/src/lop/utilities/preference_pairs.py @@ -88,7 +88,20 @@ def generate_fake_pairs(X, real_f, pair_i, data=None): pairs = [(get_dk(Y[pair_i], y),pair_i, i) for i, y in enumerate(Y) if i != pair_i] return pairs +## generate_ranking_pairs +# generate a set of ranking pairs from real fake data. +# @param X - the inputs to the function +# @param real_f - the real function to estimate. +def generate_ranking_pairs(X, real_f): + real_y = real_f(X) + + sorted_idx = np.argsort(real_y) + sorted_idx = sorted_idx[::-1] + pairs = [] + for i, idx1 in enumerate(sorted_idx[:-1]): + for j, idx2 in enumerate(sorted_idx[i+1:]): + pairs.append((get_dk(1,0), idx1, idx2)) diff --git a/tests/models/test_preference_GP.py b/tests/models/test_preference_GP.py index cb937bb..48c101c 100644 --- a/tests/models/test_preference_GP.py +++ b/tests/models/test_preference_GP.py @@ -22,6 +22,8 @@ def test_pref_GP_construction(): assert gp is not None + + def test_pref_GP_training_does_not_crash(): gp = lop.PreferenceGP(lop.RBF_kern(1.0, 1.0)) @@ -39,6 +41,27 @@ def test_pref_GP_training_does_not_crash(): assert isinstance(y, np.ndarray) assert not np.isnan(y).any() +def test_pref_gp_predict_without_training(): + gp = lop.PreferenceGP(lop.RBF_kern(0.5, 0.7)) + + X = np.arange(-0.5, 8, 0.1) + mu, sigma = gp.predict(X) + std = np.sqrt(sigma) + + assert not np.isnan(mu).any() + assert not np.isnan(sigma).any() + assert not np.isnan(std).any() + +def test_pref_gp_predict_large_without_training(): + gp = lop.PreferenceGP(lop.RBF_kern(0.5, 0.7)) + + X = np.arange(-0.5, 8, 0.1) + mu, sigma = gp.predict_large(X) + std = np.sqrt(sigma) + + assert not np.isnan(mu).any() + assert not np.isnan(sigma).any() + assert not np.isnan(std).any() def test_pref_GP_function(): X_train = np.array([0,1,2,3,4.2,6,7])