config.yml

# CONFIGURATION FILE
# it contains environments, model settings, and algorithm's parameters used.

random_seed:

##############################################################################
#  ENVIRONMENT        #  CartPole-v1          #  BipedalWalker-v2            #
#---------------------#-----------------------#------------------------------#
#  Gradient-based     #  DQN                  #  TD3 (Twin Delayed DDPG)     #
#  Gradient-free      #  Genetic Algorithm    #  Genetic Algorithm           #
##############################################################################

environment:
  name: 'CartPole-v1'   # 'CartPole-v1' or 'BipedalWalker-v2'
  animate: False

algorithm: 'ga'        # 'dqn' or 'td3' or 'ga'


###############################################################################

# GRADIENT BASED ALGORITHMS

ddpg:
  n_episodes: 100000
  buffer_size: 1000000
  batch_size: 128
  gamma: 0.99
  tau: 0.01
  actor_lr: 0.0001
  critic_lr: 0.001
  path: 'ddpg'

td3:
  n_episodes: 2000
  batch_size: 32
  buffer_size: 1000000
  buffer_size_warmup: 10000
  gamma: 0.99
  tau: 0.005
  sigma: 0.5
  noise_clip: 0.15
  train_interval: 2
  test_every: 20
  save_results_every: 10
  record_videos: 100
  actor_lr: 0.0001
  critic_lr: 0.0002
  path: 'td3'


# EVOLUTIONARY ALGORITHMS

population:
  size: 50
  max_generations: 200
  n_runs_per_agent: 1

genetic_algorithm:
  best: 5
  elite: 2
  noise_prob: 0.7
  path: 'ga'

evolution_strategies:
  noise: 0.1
  path: 'es'

cma_evolution_strategies:
  perc_selected: 0.25
  path: 'cma_es'

results:
  path: 'results'
  training: 'training'
  scores: 'scores'
  weights: 'weights'

visualization:
  path: 'visualization'
  weights: 'weights'

logger: 'runs.yml'