Skip to content

Commit

Permalink
Merge pull request #300 from kengz/net-out-activ
Browse files Browse the repository at this point in the history
Add out layer activation to net module
  • Loading branch information
kengz authored Apr 17, 2019
2 parents 92af65b + 2d27f5b commit f8567e3
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 47 deletions.
56 changes: 30 additions & 26 deletions slm_lab/agent/net/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class ConvNet(Net, nn.Module):
],
"fc_hid_layers": [512],
"hid_layers_activation": "relu",
"out_layer_activation": "tanh",
"init_fn": null,
"batch_norm": false,
"clip_grad_val": 1.0,
Expand All @@ -56,11 +57,13 @@ class ConvNet(Net, nn.Module):
def __init__(self, net_spec, in_dim, out_dim):
'''
net_spec:
conv_hid_layers: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers.
conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation.
Asssumed to all come before the flat layers.
Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)]
For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
fc_hid_layers: list of fc layers following the convolutional layers
hid_layers_activation: activation function for the hidden layers
out_layer_activation: activation function for the output layer, same shape as out_dim
init_fn: weight initialization function
batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer.
clip_grad_val: clip gradient norm if value is not None
Expand All @@ -77,6 +80,7 @@ def __init__(self, net_spec, in_dim, out_dim):
super(ConvNet, self).__init__(net_spec, in_dim, out_dim)
# set default
util.set_attr(self, dict(
out_layer_activation=None,
init_fn=None,
batch_norm=True,
clip_grad_val=None,
Expand All @@ -92,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim):
'conv_hid_layers',
'fc_hid_layers',
'hid_layers_activation',
'out_layer_activation',
'init_fn',
'batch_norm',
'clip_grad_val',
Expand All @@ -104,23 +109,30 @@ def __init__(self, net_spec, in_dim, out_dim):
'gpu',
])

# conv layer
# conv body
self.conv_model = self.build_conv_layers(self.conv_hid_layers)
self.conv_out_dim = self.get_conv_output_size()

# fc layer
if not ps.is_empty(self.fc_hid_layers):
# fc layer from flattened conv
self.fc_model = self.build_fc_layers(self.fc_hid_layers)
tail_in_dim = self.fc_hid_layers[-1]
else:
# fc body
if ps.is_empty(self.fc_hid_layers):
tail_in_dim = self.conv_out_dim
else:
# fc body from flattened conv
self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation)
tail_in_dim = self.fc_hid_layers[-1]

# tails. avoid list for single-tail for compute speed
if ps.is_integer(self.out_dim):
self.model_tail = nn.Linear(tail_in_dim, self.out_dim)
self.model_tail = net_util.build_fc_model([tail_in_dim, self.out_dim], self.out_layer_activation)
else:
self.model_tails = nn.ModuleList([nn.Linear(tail_in_dim, out_d) for out_d in self.out_dim])
if not ps.is_list(self.out_layer_activation):
self.out_layer_activation = [self.out_layer_activation] * len(out_dim)
assert len(self.out_layer_activation) == len(self.out_dim)
tails = []
for out_d, out_activ in zip(self.out_dim, self.out_layer_activation):
tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ)
tails.append(tail)
self.model_tails = nn.ModuleList(tails)

net_util.init_layers(self, self.init_fn)
for module in self.modules():
Expand Down Expand Up @@ -157,15 +169,6 @@ def build_conv_layers(self, conv_hid_layers):
conv_model = nn.Sequential(*conv_layers)
return conv_model

def build_fc_layers(self, fc_hid_layers):
'''
Builds all of the fc layers in the network and store in a Sequential model
'''
assert not ps.is_empty(fc_hid_layers)
dims = [self.conv_out_dim] + fc_hid_layers
fc_model = net_util.build_fc_model(dims, self.hid_layers_activation)
return fc_model

def forward(self, x):
'''
The feedforward step
Expand Down Expand Up @@ -292,21 +295,22 @@ def __init__(self, net_spec, in_dim, out_dim):
# Guard against inappropriate algorithms and environments
assert isinstance(out_dim, int)

# conv layer
# conv body
self.conv_model = self.build_conv_layers(self.conv_hid_layers)
self.conv_out_dim = self.get_conv_output_size()

# fc layer
if not ps.is_empty(self.fc_hid_layers):
# fc body
if ps.is_empty(self.fc_hid_layers):
tail_in_dim = self.conv_out_dim
else:
# fc layer from flattened conv
self.fc_model = self.build_fc_layers(self.fc_hid_layers)
self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation)
tail_in_dim = self.fc_hid_layers[-1]
else:
tail_in_dim = self.conv_out_dim

# tails. avoid list for single-tail for compute speed
self.v = nn.Linear(tail_in_dim, 1) # state value
self.adv = nn.Linear(tail_in_dim, out_dim[0]) # action dependent raw advantage
self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage
self.model_tails = nn.ModuleList(self.v, self.adv)

net_util.init_layers(self, self.init_fn)
for module in self.modules():
Expand Down
34 changes: 26 additions & 8 deletions slm_lab/agent/net/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class MLPNet(Net, nn.Module):
"shared": true,
"hid_layers": [32],
"hid_layers_activation": "relu",
"out_layer_activation": null,
"init_fn": "xavier_uniform_",
"clip_grad_val": 1.0,
"loss_spec": {
Expand All @@ -46,6 +47,7 @@ def __init__(self, net_spec, in_dim, out_dim):
net_spec:
hid_layers: list containing dimensions of the hidden layers
hid_layers_activation: activation function for the hidden layers
out_layer_activation: activation function for the output layer, same shape as out_dim
init_fn: weight initialization function
clip_grad_val: clip gradient norm if value is not None
loss_spec: measure of error between model predictions and correct outputs
Expand All @@ -60,6 +62,7 @@ def __init__(self, net_spec, in_dim, out_dim):
super(MLPNet, self).__init__(net_spec, in_dim, out_dim)
# set default
util.set_attr(self, dict(
out_layer_activation=None,
init_fn=None,
clip_grad_val=None,
loss_spec={'name': 'MSELoss'},
Expand All @@ -74,6 +77,7 @@ def __init__(self, net_spec, in_dim, out_dim):
'shared',
'hid_layers',
'hid_layers_activation',
'out_layer_activation',
'init_fn',
'clip_grad_val',
'loss_spec',
Expand All @@ -90,9 +94,16 @@ def __init__(self, net_spec, in_dim, out_dim):
# add last layer with no activation
# tails. avoid list for single-tail for compute speed
if ps.is_integer(self.out_dim):
self.model_tail = nn.Linear(dims[-1], self.out_dim)
self.model_tail = net_util.build_fc_model([dims[-1], self.out_dim], self.out_layer_activation)
else:
self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim])
if not ps.is_list(self.out_layer_activation):
self.out_layer_activation = [self.out_layer_activation] * len(out_dim)
assert len(self.out_layer_activation) == len(self.out_dim)
tails = []
for out_d, out_activ in zip(self.out_dim, self.out_layer_activation):
tail = net_util.build_fc_model([dims[-1], out_d], out_activ)
tails.append(tail)
self.model_tails = nn.ModuleList(tails)

net_util.init_layers(self, self.init_fn)
for module in self.modules():
Expand Down Expand Up @@ -160,6 +171,7 @@ class HydraMLPNet(Net, nn.Module):
[] # tail, no hidden layers
],
"hid_layers_activation": "relu",
"out_layer_activation": null,
"init_fn": "xavier_uniform_",
"clip_grad_val": 1.0,
"loss_spec": {
Expand Down Expand Up @@ -209,6 +221,7 @@ def __init__(self, net_spec, in_dim, out_dim):
super(HydraMLPNet, self).__init__(net_spec, in_dim, out_dim)
# set default
util.set_attr(self, dict(
out_layer_activation=None,
init_fn=None,
clip_grad_val=None,
loss_spec={'name': 'MSELoss'},
Expand All @@ -222,6 +235,7 @@ def __init__(self, net_spec, in_dim, out_dim):
util.set_attr(self, self.net_spec, [
'hid_layers',
'hid_layers_activation',
'out_layer_activation',
'init_fn',
'clip_grad_val',
'loss_spec',
Expand All @@ -247,7 +261,7 @@ def __init__(self, net_spec, in_dim, out_dim):
heads_out_dim = np.sum([head_hid_layers[-1] for head_hid_layers in self.head_hid_layers])
dims = [heads_out_dim] + self.body_hid_layers
self.model_body = net_util.build_fc_model(dims, self.hid_layers_activation)
self.model_tails = self.build_model_tails(out_dim)
self.model_tails = self.build_model_tails(self.out_dim, self.out_layer_activation)

net_util.init_layers(self, self.init_fn)
for module in self.modules():
Expand All @@ -269,18 +283,22 @@ def build_model_heads(self, in_dim):
model_heads.append(model_head)
return model_heads

def build_model_tails(self, out_dim):
def build_model_tails(self, out_dim, out_layer_activation):
'''Build each model_tail. These are stored as Sequential models in model_tails'''
if not ps.is_list(out_layer_activation):
out_layer_activation = [out_layer_activation] * len(out_dim)
model_tails = nn.ModuleList()
if ps.is_empty(self.tail_hid_layers):
for out_d in out_dim:
model_tails.append(nn.Linear(self.body_hid_layers[-1], out_d))
for out_d, out_activ in zip(out_dim, out_layer_activation):
tail = net_util.build_fc_model([self.body_hid_layers[-1], out_d], out_activ)
model_tails.append(tail)
else:
assert len(self.tail_hid_layers) == len(out_dim), 'Hydra tail hid_params inconsistent with number out dims'
for out_d, hid_layers in zip(out_dim, self.tail_hid_layers):
for out_d, out_activ, hid_layers in zip(out_dim, out_layer_activation, self.tail_hid_layers):
dims = hid_layers
model_tail = net_util.build_fc_model(dims, self.hid_layers_activation)
model_tail.add_module(str(len(model_tail)), nn.Linear(dims[-1], out_d))
tail_out = net_util.build_fc_model([dims[-1], out_d], out_activ)
model_tail.add_module(str(len(model_tail)), tail_out)
model_tails.append(model_tail)
return model_tails

Expand Down
23 changes: 17 additions & 6 deletions slm_lab/agent/net/recurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class RecurrentNet(Net, nn.Module):
"cell_type": "GRU",
"fc_hid_layers": [],
"hid_layers_activation": "relu",
"out_layer_activation": null,
"rnn_hidden_size": 32,
"rnn_num_layers": 1,
"bidirectional": False,
Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__(self, net_spec, in_dim, out_dim):
cell_type: any of RNN, LSTM, GRU
fc_hid_layers: list of fc layers preceeding the RNN layers
hid_layers_activation: activation function for the fc hidden layers
out_layer_activation: activation function for the output layer, same shape as out_dim
rnn_hidden_size: rnn hidden_size
rnn_num_layers: number of recurrent layers
bidirectional: if RNN should be bidirectional
Expand All @@ -76,6 +78,7 @@ def __init__(self, net_spec, in_dim, out_dim):
super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim)
# set default
util.set_attr(self, dict(
out_layer_activation=None,
cell_type='GRU',
rnn_num_layers=1,
bidirectional=False,
Expand All @@ -93,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim):
'cell_type',
'fc_hid_layers',
'hid_layers_activation',
'out_layer_activation',
'rnn_hidden_size',
'rnn_num_layers',
'bidirectional',
Expand All @@ -107,13 +111,13 @@ def __init__(self, net_spec, in_dim, out_dim):
'polyak_coef',
'gpu',
])
# fc layer: state processing model
if not ps.is_empty(self.fc_hid_layers):
# fc body: state processing model
if ps.is_empty(self.fc_hid_layers):
self.rnn_input_dim = self.in_dim
else:
fc_dims = [self.in_dim] + self.fc_hid_layers
self.fc_model = net_util.build_fc_model(fc_dims, self.hid_layers_activation)
self.rnn_input_dim = fc_dims[-1]
else:
self.rnn_input_dim = self.in_dim

# RNN model
self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))(
Expand All @@ -124,9 +128,16 @@ def __init__(self, net_spec, in_dim, out_dim):

# tails. avoid list for single-tail for compute speed
if ps.is_integer(self.out_dim):
self.model_tail = nn.Linear(self.rnn_hidden_size, self.out_dim)
self.model_tail = net_util.build_fc_model([self.rnn_hidden_size, self.out_dim], self.out_layer_activation)
else:
self.model_tails = nn.ModuleList([nn.Linear(self.rnn_hidden_size, out_d) for out_d in self.out_dim])
if not ps.is_list(self.out_layer_activation):
self.out_layer_activation = [self.out_layer_activation] * len(out_dim)
assert len(self.out_layer_activation) == len(self.out_dim)
tails = []
for out_d, out_activ in zip(self.out_dim, self.out_layer_activation):
tail = net_util.build_fc_model([self.rnn_hidden_size, out_d], out_activ)
tails.append(tail)
self.model_tails = nn.ModuleList(tails)

net_util.init_layers(self, self.init_fn)
for module in self.modules():
Expand Down
7 changes: 3 additions & 4 deletions slm_lab/env/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections import deque
from gym import spaces
from slm_lab.lib import util
import cv2
import gym
import numpy as np

Expand All @@ -21,7 +20,7 @@ def __init__(self, env, noop_max=30):
assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

def reset(self, **kwargs):
''' Do no-op action for a number of steps in [1, noop_max].'''
'''Do no-op action for a number of steps in [1, noop_max].'''
self.env.reset(**kwargs)
if self.override_num_noops is not None:
noops = self.override_num_noops
Expand Down Expand Up @@ -151,8 +150,8 @@ def __init__(self, env):

def observation(self, frame):
frame = util.transform_image(frame, method='openai')
frame = np.expand_dims(frame, -1)
frame = np.swapaxes(frame, 2, 0)
frame = np.transpose(frame) # reverses all axes
frame = np.expand_dims(frame, 0)
return frame


Expand Down
17 changes: 14 additions & 3 deletions slm_lab/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,20 @@ def transform_image(im, method='openai'):
raise ValueError('method must be one of: nature, openai')


def debug_image(im):
'''Use this method to render image the agent sees; waits for a key press before continuing'''
cv2.imshow('image', im)
def debug_image(im, is_chw=True):
'''
Renders an image for debugging; pauses process until key press
Handles tensor/numpy and conventions among libraries
'''
if torch.is_tensor(im): # if PyTorch tensor, get numpy
im = im.cpu().numpy()
if is_chw: # pytorch c,h,w convention
im = np.transpose(im)
im = im.astype(np.uint8) # typecast guard
if im.shape[0] == 3: # RGB image
# accommodate from RGB (numpy) to BGR (cv2)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
cv2.imshow('debug image', im)
cv2.waitKey(0)


Expand Down

0 comments on commit f8567e3

Please sign in to comment.