updates for quad2d

utiasDSL · Jun 26, 2024 · 788037e · 788037e
1 parent 003d09b
commit 788037e
Show file tree

Hide file tree

Showing 21 changed files with 181 additions and 3,889 deletions.
diff --git a/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml b/examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_track.yaml
@@ -1,16 +1,16 @@
 task_config:
   seed: 1337
   info_in_reset: True
-  ctrl_freq: 50
-  pyb_freq: 1000
+  ctrl_freq: 60
+  pyb_freq: 1200
   physics: pyb
   quad_type: 2
   normalized_rl_action_space: True
 
   init_state:
     init_x: 0
     init_x_dot: 0
-    init_z: 1
+    init_z: 1.15
     init_z_dot: 0
     init_theta: 0
     init_theta_dot: 0
@@ -20,74 +20,74 @@ task_config:
   init_state_randomization_info:
     init_x:
       distrib: 'uniform'
-      low: -2
-      high: 2
+      low: -0.01
+      high: 0.01
     init_x_dot:
       distrib: 'uniform'
-      low: -1
-      high: 1
+      low: -0.01
+      high: 0.01
     init_z:
       distrib: 'uniform'
-      low: 0.3  # Just so it doesn't crash into the ground
-      high: 2
+      low: -0.01  # Just so it doesn't crash into the ground
+      high: 0.01
     init_z_dot:
       distrib: 'uniform'
-      low: -1
-      high: 1
+      low: -0.01
+      high: 0.01
     init_theta:
       distrib: 'uniform'
-      low: -0.2
-      high: 0.2
+      low: -0.02
+      high: 0.02
     init_theta_dot:
       distrib: 'uniform'
-      low: -1.5
-      high: 1.5
+      low: -0.02
+      high: 0.02
 
   task: traj_tracking
   task_info:
     trajectory_type: figure8
     num_cycles: 1
     trajectory_plane: 'xz'
-    trajectory_position_offset: [0, 1]
-    trajectory_scale: 1
+    trajectory_position_offset: [0, 1.2]
+    trajectory_scale: 0.5
 
   inertial_prop:
     M: 0.027
     Iyy: 1.4e-05
 
-  episode_len_sec: 5
+  episode_len_sec: 10
   cost: rl_reward
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [1, 0.01, 1, 0.01, 0.01, 0.01]
-  rew_act_weight: 0.01
+  rew_state_weight: [5, 0.1, 5, 0.1, 0.1, 0.1]
+  rew_act_weight: 0.1
   rew_exponential: True
 
-  constraints:
-    - constraint_form: default_constraint
-      constrained_variable: state
-      upper_bounds:
-        - 2
-        - 1
-        - 2
-        - 1
-        - 0.2
-        - 1.5
-      lower_bounds:
-        - -2
-        - -1
-        - 0
-        - -1
-        - -0.2
-        - -1.5
-    - constraint_form: default_constraint
-      constrained_variable: input
-      upper_bounds:
-        - 0.29
-        - 0.29
-      lower_bounds:
-        - 0.06
-        - 0.06
+#  constraints:
+#    - constraint_form: default_constraint
+#      constrained_variable: state
+#      upper_bounds:
+#        - 2
+#        - 1
+#        - 2
+#        - 1
+#        - 0.2
+#        - 2.5
+#      lower_bounds:
+#        - -2
+#        - -1
+#        - 0
+#        - -1
+#        - -0.2
+#        - -2.5
+#    - constraint_form: default_constraint
+#      constrained_variable: input
+#      upper_bounds:
+#        - 0.29
+#        - 0.29
+#      lower_bounds:
+#        - 0.06
+#        - 0.06
   done_on_out_of_bound: False
   done_on_violation: False
diff --git a/examples/rl/config_overrides/quadrotor_2D/sac_quadrotor_2D.yaml b/examples/rl/config_overrides/quadrotor_2D/sac_quadrotor_2D.yaml
@@ -3,7 +3,7 @@ algo_config:
   # model args
   hidden_dim: 128
   activation: "relu"
-  use_entropy_tuning: False
+  use_entropy_tuning: True
 
   # optim args
   train_interval: 100
@@ -17,9 +17,9 @@ algo_config:
   warm_up_steps: 1000
   rollout_batch_size: 4
   num_workers: 1
-  max_buffer_size: 1000000
-  deque_size: 10
-  eval_batch_size: 10
+  max_buffer_size: 50000
+  deque_size: 50
+  eval_batch_size: 50
 
   # misc
   log_interval: 4000

diff --git a/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml b/examples/rl/config_overrides/quadrotor_2D/td3_quadrotor_2D.yaml
@@ -1,9 +1,7 @@
-algo: sac
 algo_config:
   # model args
   hidden_dim: 128
   activation: "relu"
-  use_entropy_tuning: False
 
   # optim args
   train_interval: 100
@@ -16,9 +14,9 @@ algo_config:
   warm_up_steps: 1000
   rollout_batch_size: 4
   num_workers: 1
-  max_buffer_size: 1000000
-  deque_size: 10
-  eval_batch_size: 10
+  max_buffer_size: 50000
+  deque_size: 50
+  eval_batch_size: 50
 
   # misc
   log_interval: 4000

diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml
@@ -2,7 +2,7 @@ task_config:
   seed: 1337
   info_in_reset: True
   ctrl_freq: 60
-  pyb_freq: 1200
+  pyb_freq: 60
   physics: pyb
   quad_type: 4
   normalized_rl_action_space: False
@@ -28,8 +28,8 @@ task_config:
       high: 0.01
     init_z:
       distrib: 'uniform'
-      low: 1.15
-      high: 1.15
+      low: -0.01
+      high: 0.01
     init_z_dot:
       distrib: 'uniform'
       low: -0.01
@@ -60,15 +60,15 @@ task_config:
   obs_goal_horizon: 1
 
   # RL Reward
-  rew_state_weight: [1.0, 0.01, 1.0, 0.01, 0.01, 0.01]
+  rew_state_weight: [1.0, 0.1, 1.0, 0.1, 1.0, 1.0]
   rew_act_weight: 1.0
   rew_exponential: True
 
   constraints:
     - constraint_form: default_constraint
       constrained_variable: state
-      upper_bounds: [2, 1, 2, 1, 0.2, 1.5]
-      lower_bounds: [-2, -1, 0, -1, -0.2, -1.5]
+      upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
+      lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
     - constraint_form: default_constraint
       constrained_variable: input
       upper_bounds: [0.58, 0.8]

diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/sac_quadrotor_2D_attitude.yaml
@@ -0,0 +1,29 @@
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: False
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+  entropy_lr: 0.001
+
+  # runner args
+  max_env_steps: 50000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 50000
+  deque_size: 10
+  eval_batch_size: 10
+
+  # misc
+  log_interval: 1000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 1000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/td3_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/td3_quadrotor_2D_attitude.yaml
@@ -0,0 +1,29 @@
+algo: sac
+algo_config:
+  # model args
+  hidden_dim: 128
+  activation: "relu"
+  use_entropy_tuning: False
+
+  # optim args
+  train_interval: 100
+  train_batch_size: 256
+  actor_lr: 0.001
+  critic_lr: 0.001
+
+  # runner args
+  max_env_steps: 100000
+  warm_up_steps: 1000
+  rollout_batch_size: 4
+  num_workers: 1
+  max_buffer_size: 100000
+  deque_size: 10
+  eval_batch_size: 10
+
+  # misc
+  log_interval: 2000
+  save_interval: 0
+  num_checkpoints: 0
+  eval_interval: 2000
+  eval_save_best: True
+  tensorboard: False
diff --git a/examples/rl/ppo_data/0/checkpoints/model_200000.pt b/examples/rl/ppo_data/0/checkpoints/model_200000.pt
diff --git a/examples/rl/ppo_data/0/model_best.pt b/examples/rl/ppo_data/0/model_best.pt
diff --git a/examples/rl/ppo_data/0/model_latest.pt b/examples/rl/ppo_data/0/model_latest.pt