Tried to fix p_synth_abs stuff

Robotic-Decision-Making-Lab · Oct 19, 2024 · e568bc1 · e568bc1
1 parent b548899
commit e568bc1
Show file tree

Hide file tree

Showing 5 changed files with 1,115 additions and 95 deletions.
diff --git a/experiments/active_learning_p_synth.sh b/experiments/active_learning_p_synth.sh
@@ -40,10 +40,20 @@ for fake_func in min logistic linear
 do
     for p_synth_abs in 0.9 0.8 0.85 0.7
     do
-        sel_type=rating
+        # sel_type=rating
         alpha=0.5
 
-        for selc in UCB
+        # for selc in UCB
+        # do
+        #     for i_env in 0 1 2 3 4 5 6 7 8 9
+        #     do
+        #         stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l  > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 & 
+        #     done
+        #     wait
+        # done
+
+        sel_type=switch
+        for selc in ACQ_SPEAR
         do
             for i_env in 0 1 2 3 4 5 6 7 8 9
             do
@@ -52,18 +62,20 @@ do
             wait
         done
 
-        for alpha in 0.1 0.9
-        do
-            sel_type=switch
 
-            for selc in SW_UCB_SPEAR
-            do
-                for i_env in 0 1 2 3 4 5 6 7 8 9
-                do
-                    stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l  > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 & 
-                done
-                wait
-            done
-        done
+
+        # for alpha in 0.1 0.9
+        # do
+        #     sel_type=switch
+
+        #     for selc in SW_UCB_SPEAR
+        #     do
+        #         for i_env in 0 1 2 3 4 5 6 7 8 9
+        #         do
+        #             stdbuf -oL python3 single_experiment.py --env $i_env --model $model --selector $selc --sel_type $sel_type --num_runs $number_runs --num_alts $num_alts --user $user --hyper $hyper_sel --def_pareto $def_pareto --fake_func $fake_func --kmedoid $kmedoid --p_synth_pair $p_synth_pair --p_synth_abs $p_synth_abs --sigma_pair $sigma_pair --sigma_abs $sigma_abs --v_abs $v --alpha $alpha --rbf_sigma $rbf_sigma --rbf_l $rbf_l  > results/console_output_${selc}_${model}_${i_env}_${hyper_sel}_${fake_func}.txt 2>&1 & 
+        #         done
+        #         wait
+        #     done
+        # done
     done
 done
diff --git a/experiments/analysis-2024-10-14.ipynb b/experiments/analysis-2024-10-14.ipynb
diff --git a/experiments/experiment_helper.py b/experiments/experiment_helper.py
@@ -503,6 +503,7 @@ def train_and_eval(config_filename,
             sel_idx = model.select(rewards, num_alts)
             x_train = rewards[sel_idx]
 
+
             # check if a rating or choose is selected by the active learning
             if len(sel_idx) == 1:
                 rating = user_f.rate(x_train)
@@ -553,7 +554,7 @@ def evaluation(env_num, utility_f, config, model, eval_data):
     for i in range(num_eval):
         ##### Generate paths and select paths for explanation
         rewards, indicies = eval_data[env_num][i]['rewards'], eval_data[env_num][i]['indicies']
-        #pdb.set_trace()
+
         rewards = rewards[indicies['pareto']]
         scores = model(rewards)
 

diff --git a/src/lop/utilities/human_choice_model.py b/src/lop/utilities/human_choice_model.py
@@ -64,6 +64,8 @@ def sample_human_choice(r, p=1.0, samples=None):
     xk = np.arange(len(r))
     pdf = p_human_choice(r, p=p)
 
+    print('pdf = ' + str(pdf))
+
     if samples is None:
         return np.random.choice(xk, p=pdf)
     else:

diff --git a/src/lop/utilities/synthetic_user.py b/src/lop/utilities/synthetic_user.py
@@ -118,8 +118,7 @@ def kl_objective(self, b, desired_p, sample_queries):
         return np.mean(KL_1+KL_2)
 
 
-    def sampled_objective(self, b, desired_p, sample_queries):
-        y = (self.fake_f(sample_queries) * self.k) + self.b
+    def sampled_objective(self, b, desired_p, sample_queries, y):
         p = p_human_choice(y, p=b)
 
         p_max = np.max(p, axis=1)
@@ -141,7 +140,7 @@ def rate_sampled_objective(self, sigma, desired_p, sample_queries, y):
         return (p_samp - desired_p)**2
 
     def sample_Qs(self, rewards, Q_size):
-        num_Q = min(comb(rewards.shape[0], Q_size) * 0.5, 20000)
+        num_Q = min(comb(rewards.shape[0], Q_size) * 0.5, 60000)
         if num_Q < 30:
             num_Q = min(30, comb(rewards.shape[0], Q_size))
         num_Q = int(num_Q)
@@ -177,8 +176,10 @@ def learn_beta(self, rewards, p, Q_size=2, p_sigma=None):
         if p_sigma is None:
             p_sigma = p
 
+
         self.learn_beta_pairwise(rewards, p, Q_size, Qs=Qs)
         self.learn_sigma(rewards, p_sigma, Q_size, Qs=Qs)
+        print('beta = ' + str(self.beta) + ' sigma=' + str(self.sigma))
 
 
     ## learn_sigma
@@ -214,8 +215,10 @@ def learn_beta_pairwise(self, rewards, p, Q_size=2, Qs=None):
 
         sample_Q = rewards[Qs]
 
+        y = (self.fake_f(sample_Q) * self.k) + self.b
+
         for i in range(10):
-            res = minimize_scalar(self.sampled_objective, bounds=[0.01, 150.0], args=(p, sample_Q), options={'xatol': 0.01})
+            res = minimize_scalar(self.sampled_objective, bounds=[0.01, 150.0], args=(p, sample_Q, y), options={'xatol': 0.01})
 
             if res.fun < 0.02:
                 break