Skip to content

Commit

Permalink
Merge pull request #352 from kengz/v4-dev
Browse files Browse the repository at this point in the history
v4.0.0 prerelease merge: Algorithm Benchmark, Analysis, API simplification
  • Loading branch information
kengz authored May 27, 2019
2 parents 1972461 + e34cb30 commit ac2e5b3
Show file tree
Hide file tree
Showing 166 changed files with 10,301 additions and 10,025 deletions.
6 changes: 3 additions & 3 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ about: Create a report to help us improve
A clear and concise description of what the bug is.

**To Reproduce**
1. OS used:
2. SLM-Lab git SHA (run `git rev-parse HEAD`):
3. `spec` and `config/experiments.json` used:
1. OS and environment:
2. SLM Lab git SHA (run `git rev-parse HEAD` to get it):
3. `spec` file used:

**Additional context**
Add any other context about the problem here.
Expand Down
329 changes: 109 additions & 220 deletions README.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions bin/setup_arch_extra
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ echo "--- Installing Unity ML agents ---"
conda activate lab
pip install unityagents==0.2.0
pip uninstall -y tensorflow tensorboard

echo "--- Installing VizDoom ---"
pip install vizdoom==1.1.6
3 changes: 3 additions & 0 deletions bin/setup_macOS_extra
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ echo "--- Installing Unity ML agents ---"
conda activate lab
pip install unityagents==0.2.0
pip uninstall -y tensorflow tensorboard

echo "--- Installing VizDoom ---"
pip install vizdoom==1.1.6
3 changes: 3 additions & 0 deletions bin/setup_ubuntu_extra
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ echo "--- Installing Unity ML agents ---"
conda activate lab
pip install unityagents==0.2.0
pip uninstall -y tensorflow tensorboard

echo "--- Installing VizDoom ---"
pip install vizdoom==1.1.6
19 changes: 9 additions & 10 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ dependencies:
- pandas=0.22.0=py36_0
- pillow=5.0.0=py36_0
- pip=9.0.1=py36_1
- plotly=3.4.2
- plotly=3.8.1
- plotly-orca=1.2.1
- psutil=5.4.7
- psutil=5.6.2
- pycodestyle=2.3.1=py36_0
- pydash=4.2.1=py_0
- pytest-cov=2.5.1=py36_0
Expand All @@ -42,18 +42,17 @@ dependencies:
- xlrd=1.1.0=py_2
- pytorch=1.0.1
- pip:
- atari-py==0.1.1
- box2d-py==2.3.8
- cloudpickle==0.5.2
- colorlover==0.3.0
- deap==1.2.2
- gym==0.10.9
- gym[atari]
- gym[box2d]
- gym[classic_control]
- opencv-python==3.4.0.12
- pyopengl==3.1.0
- ray==0.5.3
- ray==0.7.0
- redis==2.10.6
- xvfbwrapper==0.2.9
- vizdoom==1.1.6
- gym==0.12.1
- gym[atari]
- gym[box2d]
- gym[classic_control]
- roboschool==1.0.46
- atari-py
11 changes: 11 additions & 0 deletions job/a2c_gae_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"experimental/a2c/a2c_gae_atari.json": {
"a2c_gae_atari": "train"
},
"experimental/a2c/a2c_gae_cont.json": {
"a2c_gae_cont": "train"
},
"experimental/a2c/a2c_gae_cont_hard.json": {
"a2c_gae_cont_hard": "train"
},
}
8 changes: 8 additions & 0 deletions job/a2c_nstep_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"experimental/a2c/a2c_atari.json": {
"a2c_atari": "train"
},
"experimental/a2c/a2c_cont.json": {
"a2c_cont": "train"
},
}
5 changes: 5 additions & 0 deletions job/a3c_gae_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"experimental/a3c/a3c_gae_atari.json": {
"a3c_gae_atari": "train"
},
}
14 changes: 14 additions & 0 deletions job/dqn_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"experimental/dqn/dqn_atari.json": {
"dqn_atari": "train"
},
"experimental/dqn/dqn_per_atari.json": {
"dqn_per_atari": "train"
},
"experimental/dqn/ddqn_atari.json": {
"ddqn_atari": "train"
},
"experimental/dqn/ddqn_per_atari.json": {
"ddqn_per_atari": "train"
},
}
File renamed without changes.
11 changes: 11 additions & 0 deletions job/ppo_benchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"experimental/ppo/ppo_atari.json": {
"ppo_atari": "train"
},
"experimental/ppo/ppo_cont.json": {
"ppo_cont": "train"
},
"experimental/ppo/ppo_cont_hard.json": {
"ppo_cont_hard": "train"
},
}
4 changes: 1 addition & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
{
"name": "slm_lab",
"version": "2.1.2",
"version": "4.0.0",
"description": "Modular Deep Reinforcement Learning framework in PyTorch.",
"main": "index.js",
"scripts": {
"start": "python run_lab.py",
"debug": "LOG_LEVEL=DEBUG python run_lab.py",
"debug2": "LOG_LEVEL=DEBUG2 python run_lab.py",
"debug3": "LOG_LEVEL=DEBUG3 python run_lab.py",
"retro_analyze": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_analyze(sys.argv[1])'",
"retro_eval": "python -c 'import sys; from slm_lab.experiment import retro_analysis; retro_analysis.retro_eval(sys.argv[1])'",
"reset": "rm -rf data/* .cache __pycache__ */__pycache__ *egg-info .pytest* htmlcov .coverage* *.xml",
Expand Down
125 changes: 56 additions & 69 deletions run_lab.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
'''
The entry point of SLM Lab
Specify what to run in `config/experiments.json`
Then run `python run_lab.py` or `yarn start`
'''
import os
# NOTE increase if needed. Pytorch thread overusage https://github.com/pytorch/pytorch/issues/975
os.environ['OMP_NUM_THREADS'] = '1'
# The SLM Lab entrypoint
# to run scheduled set of specs:
# python run_lab.py job/experiments.json
# to run a single spec:
# python run_lab.py slm_lab/spec/experimental/a2c_pong.json a2c_pong train
from slm_lab import EVAL_MODES, TRAIN_MODES
from slm_lab.experiment import analysis, retro_analysis
from slm_lab.experiment.control import Session, Trial, Experiment
from slm_lab.experiment.monitor import InfoSpace
from slm_lab.lib import logger, util
from slm_lab.spec import spec_util
from xvfbwrapper import Xvfb
import os
import pydash as ps
import sys
import torch
import torch.multiprocessing as mp


Expand All @@ -22,81 +20,70 @@
]
debug_level = 'DEBUG'
logger.toggle_debug(debug_modules, debug_level)
logger = logger.get_logger(__name__)


def run_new_mode(spec_file, spec_name, lab_mode):
'''Run to generate new data with `search, train, dev`'''
spec = spec_util.get(spec_file, spec_name)
info_space = InfoSpace()
analysis.save_spec(spec, info_space, unit='experiment') # first save the new spec
if lab_mode == 'search':
info_space.tick('experiment')
Experiment(spec, info_space).run()
elif lab_mode.startswith('train'):
info_space.tick('trial')
Trial(spec, info_space).run()
elif lab_mode == 'dev':
spec = spec_util.override_dev_spec(spec)
info_space.tick('trial')
Trial(spec, info_space).run()
else:
raise ValueError(f'Unrecognizable lab_mode not of {TRAIN_MODES}')


def run_old_mode(spec_file, spec_name, lab_mode):
'''Run using existing data with `enjoy, eval`. The eval mode is also what train mode's online eval runs in a subprocess via bash command'''
# reconstruct spec and info_space from existing data
lab_mode, prename = lab_mode.split('@')
predir, _, _, _, _, _ = util.prepath_split(spec_file)
prepath = f'{predir}/{prename}'
spec, info_space = util.prepath_to_spec_info_space(prepath)
# see InfoSpace def for more on these
info_space.ckpt = 'eval'
info_space.eval_model_prepath = prepath

# no info_space.tick() as they are reconstructed
if lab_mode == 'enjoy':
def run_spec(spec, lab_mode):
'''Run a spec in lab_mode'''
os.environ['lab_mode'] = lab_mode
if lab_mode in TRAIN_MODES:
spec_util.save(spec) # first save the new spec
if lab_mode == 'dev':
spec = spec_util.override_dev_spec(spec)
if lab_mode == 'search':
spec_util.tick(spec, 'experiment')
Experiment(spec).run()
else:
spec_util.tick(spec, 'trial')
Trial(spec).run()
elif lab_mode in EVAL_MODES:
spec = spec_util.override_enjoy_spec(spec)
Session(spec, info_space).run()
elif lab_mode == 'eval':
# example eval command:
# python run_lab.py data/dqn_cartpole_2018_12_19_224811/dqn_cartpole_t0_spec.json dqn_cartpole eval@dqn_cartpole_t0_s1_ckpt-epi10-totalt1000
spec = spec_util.override_eval_spec(spec)
Session(spec, info_space).run()
util.clear_periodic_ckpt(prepath) # cleanup after itself
retro_analysis.analyze_eval_trial(spec, info_space, predir)
Session(spec).run()
else:
raise ValueError(f'Unrecognizable lab_mode not of {EVAL_MODES}')
raise ValueError(f'Unrecognizable lab_mode not of {TRAIN_MODES} or {EVAL_MODES}')


def run_by_mode(spec_file, spec_name, lab_mode):
'''The main run lab function for all lab_modes'''
logger.info(f'Running lab in mode: {lab_mode}')
# '@' is reserved for 'enjoy@{prename}'
os.environ['lab_mode'] = lab_mode.split('@')[0]
def read_spec_and_run(spec_file, spec_name, lab_mode):
'''Read a spec and run it in lab mode'''
logger.info(f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}')
if lab_mode in TRAIN_MODES:
run_new_mode(spec_file, spec_name, lab_mode)
else:
run_old_mode(spec_file, spec_name, lab_mode)
spec = spec_util.get(spec_file, spec_name)
else: # eval mode
lab_mode, prename = lab_mode.split('@')
spec = spec_util.get_eval_spec(spec_file, prename)

if 'spec_params' not in spec:
run_spec(spec, lab_mode)
else: # spec is parametrized; run them in parallel
param_specs = spec_util.get_param_specs(spec)
num_pro = spec['meta']['param_spec_process']
# can't use Pool since it cannot spawn nested Process, which is needed for VecEnv and parallel sessions. So these will run and wait by chunks
workers = [mp.Process(target=run_spec, args=(spec, lab_mode)) for spec in param_specs]
for chunk_w in ps.chunk(workers, num_pro):
for w in chunk_w:
w.start()
for w in chunk_w:
w.join()


def main():
if len(sys.argv) > 1:
args = sys.argv[1:]
'''Main method to run jobs from scheduler or from a spec directly'''
args = sys.argv[1:]
if len(args) <= 1: # use scheduler
job_file = args[0] if len(args) == 1 else 'job/experiments.json'
for spec_file, spec_and_mode in util.read(job_file).items():
for spec_name, lab_mode in spec_and_mode.items():
read_spec_and_run(spec_file, spec_name, lab_mode)
else: # run single spec
assert len(args) == 3, f'To use sys args, specify spec_file, spec_name, lab_mode'
run_by_mode(*args)
return

experiments = util.read('config/experiments.json')
for spec_file in experiments:
for spec_name, lab_mode in experiments[spec_file].items():
run_by_mode(spec_file, spec_name, lab_mode)
read_spec_and_run(*args)


if __name__ == '__main__':
torch.set_num_threads(1) # prevent multithread slowdown
mp.set_start_method('spawn') # for distributed pytorch to work
if sys.platform == 'darwin':
# avoid xvfb for MacOS: https://github.com/nipy/nipype/issues/1400
# avoid xvfb on MacOS: https://github.com/nipy/nipype/issues/1400
main()
else:
with Xvfb() as xvfb: # safety context for headless machines
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def run_tests(self):

setup(
name='slm_lab',
version='3.0.0',
version='4.0.0',
description='Modular Deep Reinforcement Learning framework in PyTorch.',
long_description='https://github.com/kengz/slm_lab',
keywords='SLM Lab',
Expand Down
Loading

0 comments on commit ac2e5b3

Please sign in to comment.