Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ddpg in tensorflow #51

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6b7f6fb
Add window title parameter to MjViewer
ryanjulian Feb 8, 2018
3ae055a
Set window context before rendering in MjViewer
ryanjulian Feb 8, 2018
4eb1e16
Remove chainer from enviroment.yml
ryanjulian Mar 9, 2018
1f3f06a
[tf] Symbolic entropy for DiagonalGaussian
ryanjulian Mar 20, 2018
4fa6bff
TravisCI support
ryanjulian Mar 25, 2018
4694327
[travisci] Correct changed file detection
ryanjulian Mar 27, 2018
2cf515c
Protect StatefulPool from class methods
ryanjulian Mar 27, 2018
c7ea99f
add tensorboard
cjcchen Mar 31, 2018
4cfff58
fix pull request
cjcchen Mar 31, 2018
b152895
yapf format
cjcchen Mar 31, 2018
c3caaf8
remove dm_control deps
cjcchen Mar 31, 2018
0b2776b
Add TensorBoard Support
cjcchen Mar 31, 2018
b220f58
fix set tenworboard dir
cjcchen Mar 31, 2018
dbcc665
add histogram
cjcchen Apr 2, 2018
0ec372f
remove unused import
cjcchen Apr 5, 2018
b07c6e4
Merge branch 'integration' into tensorboard_histogram
cjcchen Apr 5, 2018
ea6341f
format
cjcchen Apr 5, 2018
b4fee90
format
cjcchen Apr 5, 2018
fbd3c01
format
cjcchen Apr 5, 2018
ff7bcb5
set private para
cjcchen Apr 5, 2018
9e08f2b
reformat
cjcchen Apr 6, 2018
54dc01a
reformat
cjcchen Apr 6, 2018
56f7800
add ddpg
cjcchen Apr 14, 2018
9157bf0
add example for ddpg
cjcchen Apr 14, 2018
9afa1b1
add an example for ddpg
cjcchen Apr 14, 2018
d9db1d5
remove unused import
cjcchen Apr 14, 2018
7b5d800
remove unused line
cjcchen Apr 14, 2018
a5f8f6e
remove unused line
cjcchen Apr 14, 2018
244dbe2
remove unused line
cjcchen Apr 14, 2018
6e888f1
add check point save
cjcchen Apr 14, 2018
996c4d7
add check point save
cjcchen Apr 14, 2018
393f0e8
Merge branch 'tensorboard_histogram' of https://github.com/cjcchen/rl…
cjcchen May 1, 2018
eacb3f5
add param desc
cjcchen May 8, 2018
27dfbf7
format
cjcchen May 8, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
language: python

python:
- "3.5.2"

install:
- pip install yapf

git:
depth: false

script:
- scripts/travisci/check_yapf.sh

notifications:
email: false
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ dependencies:
- git+https://github.com/neocxi/prettytensor.git
- jupyter
- progressbar2
- chainer==1.18.0
- https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp35-cp35m-linux_x86_64.whl; 'linux' in sys_platform
- https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow_gpu-1.0.1-py3-none-any.whl; sys_platform == 'darwin'
- numpy-stl==2.2.0
- nibabel==2.1.0
- pylru==1.0.9
- hyperopt
- polling
- tensorboard
27 changes: 27 additions & 0 deletions examples/ddpg_gym_tf_pendulum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from sandbox.rocky.tf.algos.ddpg.ddpg import DDPG
from sandbox.rocky.tf.algos.ddpg.noise import OrnsteinUhlenbeckActionNoise

import gym
import numpy as np
import tensorflow as tf

RANDOM_SEED = 1234

np.random.seed(RANDOM_SEED)
tf.set_random_seed(RANDOM_SEED)

env = gym.make('Pendulum-v0')
env.seed(RANDOM_SEED)

action_dim = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(
mu=np.zeros(action_dim), sigma=float(0.02) * np.ones(action_dim))

ddpg = DDPG(
env,
plot=False,
action_noise=action_noise,
check_point_dir='pendulum',
log_dir="pendulum_ou_noise")

ddpg.train()
11 changes: 11 additions & 0 deletions examples/example_tensorboard_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from rllab.misc import logger

import numpy as np

logger.set_tensorboard_dir("./test")

for i in range(100):
val = np.random.normal(0, i, size=(3, 3, 3))
logger.record_tabular("app", i)
logger.record_histogram("gass", val)
logger.dump_tabular()
67 changes: 56 additions & 11 deletions rllab/misc/logger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from enum import Enum

from rllab.misc.tabulate import tabulate
from rllab.misc.console import mkdir_p, colorize
from rllab.misc.console import mkdir_p
from rllab.misc.console import colorize
from rllab.misc.autoargs import get_all_parameters
from rllab.misc.tensorboard_summary import Summary

import tensorflow as tf

from contextlib import contextmanager
from enum import Enum
import numpy as np
import os

import os.path as osp
import os
import sys
import datetime
import dateutil.tz
Expand All @@ -31,13 +36,17 @@
_tabular_fds = {}
_tabular_header_written = set()

_tensorboard_writer = None
_snapshot_dir = None
_snapshot_mode = 'all'
_snapshot_gap = 1

_log_tabular_only = False
_header_printed = False

_tensorboard_step_key = None
_tensorboard = Summary()


def _add_output(file_name, arr, fds, mode='a'):
if file_name not in arr:
Expand Down Expand Up @@ -77,6 +86,10 @@ def remove_tabular_output(file_name):
_remove_output(file_name, _tabular_outputs, _tabular_fds)


def set_tensorboard_dir(dir_name):
_tensorboard.set_dir(dir_name)


def set_snapshot_dir(dir_name):
global _snapshot_dir
_snapshot_dir = dir_name
Expand All @@ -94,18 +107,26 @@ def set_snapshot_mode(mode):
global _snapshot_mode
_snapshot_mode = mode


def get_snapshot_gap():
return _snapshot_gap


def set_snapshot_gap(gap):
global _snapshot_gap
_snapshot_gap = gap


def set_log_tabular_only(log_tabular_only):
global _log_tabular_only
_log_tabular_only = log_tabular_only


def set_tensorboard_step_key(key):
global _tensorboard_step_key
_tensorboard_step_key = key


def get_log_tabular_only():
return _log_tabular_only

Expand All @@ -130,9 +151,14 @@ def log(s, with_prefix=True, with_timestamp=True, color=None):


def record_tabular(key, val):
_tensorboard.record_scale(str(key), val)
_tabular.append((_tabular_prefix_str + str(key), str(val)))


def record_histogram(key, val):
_tensorboard.record_histogram(str(key), val)


def push_tabular_prefix(key):
_tabular_prefixes.append(key)
global _tabular_prefix_str
Expand Down Expand Up @@ -186,6 +212,14 @@ def refresh(self):
table_printer = TerminalTablePrinter()


def dump_tensorboard(*args, **kwargs):
step = None
if _tensorboard_step_key and _tensorboard_step_key in tabular_dict:
step = tabular_dict[_tensorboard_step_key]

_tensorboard.dump_tensorboard(step)


def dump_tabular(*args, **kwargs):
wh = kwargs.pop("write_header", None)
if len(_tabular) > 0:
Expand All @@ -195,17 +229,24 @@ def dump_tabular(*args, **kwargs):
for line in tabulate(_tabular).split('\n'):
log(line, *args, **kwargs)
tabular_dict = dict(_tabular)

# Also write to the csv files
# This assumes that the keys in each iteration won't change!
for tabular_fd in list(_tabular_fds.values()):
writer = csv.DictWriter(tabular_fd, fieldnames=list(tabular_dict.keys()))
if wh or (wh is None and tabular_fd not in _tabular_header_written):
writer = csv.DictWriter(
tabular_fd, fieldnames=list(tabular_dict.keys()))
if wh or (wh is None
and tabular_fd not in _tabular_header_written):
writer.writeheader()
_tabular_header_written.add(tabular_fd)
writer.writerow(tabular_dict)
tabular_fd.flush()
del _tabular[:]

# write to the tensorboard folder
# This assumes that the keys in each iteration won't change!
dump_tensorboard(args, kwargs)


def pop_prefix():
del _prefixes[-1]
Expand Down Expand Up @@ -245,7 +286,8 @@ def log_parameters(log_file, args, classes):
log_params[name] = params
else:
log_params[name] = getattr(cls, "__kwargs", dict())
log_params[name]["_name"] = cls.__module__ + "." + cls.__class__.__name__
log_params[name][
"_name"] = cls.__module__ + "." + cls.__class__.__name__
mkdir_p(os.path.dirname(log_file))
with open(log_file, "w") as f:
json.dump(log_params, f, indent=2, sort_keys=True)
Expand All @@ -258,13 +300,13 @@ def stub_to_json(stub_sth):
data = dict()
for k, v in stub_sth.kwargs.items():
data[k] = stub_to_json(v)
data["_name"] = stub_sth.proxy_class.__module__ + "." + stub_sth.proxy_class.__name__
data[
"_name"] = stub_sth.proxy_class.__module__ + "." + stub_sth.proxy_class.__name__
return data
elif isinstance(stub_sth, instrument.StubAttr):
return dict(
obj=stub_to_json(stub_sth.obj),
attr=stub_to_json(stub_sth.attr_name)
)
attr=stub_to_json(stub_sth.attr_name))
elif isinstance(stub_sth, instrument.StubMethodCall):
return dict(
obj=stub_to_json(stub_sth.obj),
Expand Down Expand Up @@ -294,7 +336,10 @@ def default(self, o):
if isinstance(o, type):
return {'$class': o.__module__ + "." + o.__name__}
elif isinstance(o, Enum):
return {'$enum': o.__module__ + "." + o.__class__.__name__ + '.' + o.name}
return {
'$enum':
o.__module__ + "." + o.__class__.__name__ + '.' + o.name
}
return json.JSONEncoder.default(self, o)


Expand Down
63 changes: 63 additions & 0 deletions rllab/misc/tensorboard_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from rllab.misc.console import mkdir_p

import tensorflow as tf
import os


class Summary:
def __init__(self):
self._summary_scale = tf.Summary()
self._histogram_ds = {}
self._histogram_summary_op = []

self._session = tf.Session()

self._default_step = 0
self._step_key = None
self._writer = None

def record_histogram(self, key, val):
if str(key) not in self._histogram_ds:
self._histogram_ds[str(key)] = tf.Variable(val)
self._histogram_summary_op.append(
tf.summary.histogram(str(key), self._histogram_ds[str(key)]))

x = tf.assign(self._histogram_ds[str(key)], val)
self._session.run(x)

def record_scale(self, key, val):
self._summary_scale.value.add(tag=str(key), simple_value=float(val))

def dump_tensorboard(self, step=None):
run_step = self._default_step
if step:
run_step = step
else:
self._default_step += 1

self._dump_histogram(run_step)
self._dump_scale(run_step)

def set_dir(self, dir_name):
if not dir_name:
if self._writer:
self._writer.close()
self._writer = None
else:
mkdir_p(os.path.dirname(dir_name))
self._writer = tf.summary.FileWriter(dir_name)
self._default_step = 0
assert self._writer is not None
print("tensorboard data will be logged into:", dir_name)

def _dump_histogram(self, step):
if len(self._histogram_summary_op):
summary_str = self._session.run(
tf.summary.merge(self._histogram_summary_op))
self._writer.add_summary(summary_str, global_step=step)
self._writer.flush()

def _dump_scale(self, step):
self._writer.add_summary(self._summary_scale, step)
self._writer.flush()
del self._summary_scale.value[:]
Loading