config/dummy_config.yaml

algorithm:
  class_name: PPO
  # training parameters
  # -- value function
  value_loss_coef: 1.0
  clip_param: 0.2
  use_clipped_value_loss: true
  # -- surrogate loss
  desired_kl: 0.01
  entropy_coef: 0.01
  gamma: 0.99
  lam: 0.95
  max_grad_norm: 1.0
  # -- training
  learning_rate: 0.001
  num_learning_epochs: 5
  num_mini_batches: 4  # mini batch size = num_envs * num_steps / num_mini_batches
  schedule: adaptive  # adaptive, fixed
policy:
  class_name: ActorCritic
  # for MLP i.e. `ActorCritic`
  activation: elu
  actor_hidden_dims: [128, 128, 128]
  critic_hidden_dims: [128, 128, 128]
  init_noise_std: 1.0
  # only needed for `ActorCriticRecurrent`
  # rnn_type: 'lstm'
  # rnn_hidden_size: 512
  # rnn_num_layers: 1
runner:
    num_steps_per_env: 24  # number of steps per environment per iteration
    max_iterations: 1500  # number of policy updates
    empirical_normalization: false
    # -- logging parameters
    save_interval: 50  # check for potential saves every `save_interval` iterations
    experiment_name: walking_experiment
    run_name: ""
    # -- logging writer
    logger: tensorboard  # tensorboard, neptune, wandb
    neptune_project: legged_gym
    wandb_project: legged_gym
    # -- load and resuming
    resume: false
    load_run: -1  # -1 means load latest run
    resume_path: null  # updated from load_run and checkpoint
    checkpoint: -1  # -1 means load latest checkpoint
runner_class_name: OnPolicyRunner
seed: 1