Skip to content

Commit

Permalink
Tried to fix p_synth_abs stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
ianran committed Oct 19, 2024
1 parent b548899 commit e568bc1
Show file tree
Hide file tree
Showing 5 changed files with 1,115 additions and 95 deletions.
40 changes: 26 additions & 14 deletions experiments/active_learning_p_synth.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,20 @@ for fake_func in min logistic linear
do
for p_synth_abs in 0.9 0.8 0.85 0.7
do
sel_type=rating
# sel_type=rating
alpha=0.5

for selc in UCB
# for selc in UCB
# do
# for i_env in 0 1 2 3 4 5 6 7 8 9
# do
# stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 &
# done
# wait
# done

sel_type=switch
for selc in ACQ_SPEAR
do
for i_env in 0 1 2 3 4 5 6 7 8 9
do
Expand All @@ -52,18 +62,20 @@ do
wait
done

for alpha in 0.1 0.9
do
sel_type=switch

for selc in SW_UCB_SPEAR
do
for i_env in 0 1 2 3 4 5 6 7 8 9
do
stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 &
done
wait
done
done

# for alpha in 0.1 0.9
# do
# sel_type=switch

# for selc in SW_UCB_SPEAR
# do
# for i_env in 0 1 2 3 4 5 6 7 8 9
# do
# stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 &
# done
# wait
# done
# done
done
done
1,154 changes: 1,078 additions & 76 deletions experiments/analysis-2024-10-14.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion experiments/experiment_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def train_and_eval(config_filename,
sel_idx = model.select(rewards, num_alts)
x_train = rewards[sel_idx]


# check if a rating or choose is selected by the active learning
if len(sel_idx) == 1:
rating = user_f.rate(x_train)
Expand Down Expand Up @@ -553,7 +554,7 @@ def evaluation(env_num, utility_f, config, model, eval_data):
for i in range(num_eval):
##### Generate paths and select paths for explanation
rewards, indicies = eval_data[env_num][i]['rewards'], eval_data[env_num][i]['indicies']
#pdb.set_trace()

rewards = rewards[indicies['pareto']]
scores = model(rewards)

Expand Down
2 changes: 2 additions & 0 deletions src/lop/utilities/human_choice_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def sample_human_choice(r, p=1.0, samples=None):
xk = np.arange(len(r))
pdf = p_human_choice(r, p=p)

print('pdf = ' + str(pdf))

if samples is None:
return np.random.choice(xk, p=pdf)
else:
Expand Down
11 changes: 7 additions & 4 deletions src/lop/utilities/synthetic_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ def kl_objective(self, b, desired_p, sample_queries):
return np.mean(KL_1+KL_2)


def sampled_objective(self, b, desired_p, sample_queries):
y = (self.fake_f(sample_queries) * self.k) + self.b
def sampled_objective(self, b, desired_p, sample_queries, y):
p = p_human_choice(y, p=b)

p_max = np.max(p, axis=1)
Expand All @@ -141,7 +140,7 @@ def rate_sampled_objective(self, sigma, desired_p, sample_queries, y):
return (p_samp - desired_p)**2

def sample_Qs(self, rewards, Q_size):
num_Q = min(comb(rewards.shape[0], Q_size) * 0.5, 20000)
num_Q = min(comb(rewards.shape[0], Q_size) * 0.5, 60000)
if num_Q < 30:
num_Q = min(30, comb(rewards.shape[0], Q_size))
num_Q = int(num_Q)
Expand Down Expand Up @@ -177,8 +176,10 @@ def learn_beta(self, rewards, p, Q_size=2, p_sigma=None):
if p_sigma is None:
p_sigma = p


self.learn_beta_pairwise(rewards, p, Q_size, Qs=Qs)
self.learn_sigma(rewards, p_sigma, Q_size, Qs=Qs)
print('beta = ' + str(self.beta) + ' sigma=' + str(self.sigma))


## learn_sigma
Expand Down Expand Up @@ -214,8 +215,10 @@ def learn_beta_pairwise(self, rewards, p, Q_size=2, Qs=None):

sample_Q = rewards[Qs]

y = (self.fake_f(sample_Q) * self.k) + self.b

for i in range(10):
res = minimize_scalar(self.sampled_objective, bounds=[0.01, 150.0], args=(p, sample_Q), options={'xatol': 0.01})
res = minimize_scalar(self.sampled_objective, bounds=[0.01, 150.0], args=(p, sample_Q, y), options={'xatol': 0.01})

if res.fun < 0.02:
break
Expand Down

0 comments on commit e568bc1

Please sign in to comment.