diff --git a/slm_lab/agent/net/conv.py b/slm_lab/agent/net/conv.py index 2e05133d0..b6e787ac6 100644 --- a/slm_lab/agent/net/conv.py +++ b/slm_lab/agent/net/conv.py @@ -31,6 +31,7 @@ class ConvNet(Net, nn.Module): ], "fc_hid_layers": [512], "hid_layers_activation": "relu", + "out_layer_activation": "tanh", "init_fn": null, "batch_norm": false, "clip_grad_val": 1.0, @@ -56,11 +57,13 @@ class ConvNet(Net, nn.Module): def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: - conv_hid_layers: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. + conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation. + Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md fc_hid_layers: list of fc layers following the convolutional layers hid_layers_activation: activation function for the hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad_val: clip gradient norm if value is not None @@ -77,6 +80,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(ConvNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, batch_norm=True, clip_grad_val=None, @@ -92,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'batch_norm', 'clip_grad_val', @@ -104,23 +109,30 @@ def __init__(self, net_spec, in_dim, out_dim): 'gpu', ]) - # conv layer + # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() - # fc layer - if not ps.is_empty(self.fc_hid_layers): - # fc layer from flattened conv - self.fc_model = self.build_fc_layers(self.fc_hid_layers) - tail_in_dim = self.fc_hid_layers[-1] - else: + # fc body + if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim + else: + # fc body from flattened conv + self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) + tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(tail_in_dim, self.out_dim) + self.model_tail = net_util.build_fc_model([tail_in_dim, self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(tail_in_dim, out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -157,15 +169,6 @@ def build_conv_layers(self, conv_hid_layers): conv_model = nn.Sequential(*conv_layers) return conv_model - def build_fc_layers(self, fc_hid_layers): - ''' - Builds all of the fc layers in the network and store in a Sequential model - ''' - assert not ps.is_empty(fc_hid_layers) - dims = [self.conv_out_dim] + fc_hid_layers - fc_model = net_util.build_fc_model(dims, self.hid_layers_activation) - return fc_model - def forward(self, x): ''' The feedforward step @@ -292,21 +295,22 @@ def __init__(self, net_spec, in_dim, out_dim): # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) - # conv layer + # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() - # fc layer - if not ps.is_empty(self.fc_hid_layers): + # fc body + if ps.is_empty(self.fc_hid_layers): + tail_in_dim = self.conv_out_dim + else: # fc layer from flattened conv - self.fc_model = self.build_fc_layers(self.fc_hid_layers) + self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] - else: - tail_in_dim = self.conv_out_dim # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value - self.adv = nn.Linear(tail_in_dim, out_dim[0]) # action dependent raw advantage + self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage + self.model_tails = nn.ModuleList(self.v, self.adv) net_util.init_layers(self, self.init_fn) for module in self.modules(): diff --git a/slm_lab/agent/net/mlp.py b/slm_lab/agent/net/mlp.py index 50c72eac3..8a015593a 100644 --- a/slm_lab/agent/net/mlp.py +++ b/slm_lab/agent/net/mlp.py @@ -20,6 +20,7 @@ class MLPNet(Net, nn.Module): "shared": true, "hid_layers": [32], "hid_layers_activation": "relu", + "out_layer_activation": null, "init_fn": "xavier_uniform_", "clip_grad_val": 1.0, "loss_spec": { @@ -46,6 +47,7 @@ def __init__(self, net_spec, in_dim, out_dim): net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs @@ -60,6 +62,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(MLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, @@ -74,6 +77,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'shared', 'hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', @@ -90,9 +94,16 @@ def __init__(self, net_spec, in_dim, out_dim): # add last layer with no activation # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(dims[-1], self.out_dim) + self.model_tail = net_util.build_fc_model([dims[-1], self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([dims[-1], out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -160,6 +171,7 @@ class HydraMLPNet(Net, nn.Module): [] # tail, no hidden layers ], "hid_layers_activation": "relu", + "out_layer_activation": null, "init_fn": "xavier_uniform_", "clip_grad_val": 1.0, "loss_spec": { @@ -209,6 +221,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(HydraMLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, @@ -222,6 +235,7 @@ def __init__(self, net_spec, in_dim, out_dim): util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', @@ -247,7 +261,7 @@ def __init__(self, net_spec, in_dim, out_dim): heads_out_dim = np.sum([head_hid_layers[-1] for head_hid_layers in self.head_hid_layers]) dims = [heads_out_dim] + self.body_hid_layers self.model_body = net_util.build_fc_model(dims, self.hid_layers_activation) - self.model_tails = self.build_model_tails(out_dim) + self.model_tails = self.build_model_tails(self.out_dim, self.out_layer_activation) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -269,18 +283,22 @@ def build_model_heads(self, in_dim): model_heads.append(model_head) return model_heads - def build_model_tails(self, out_dim): + def build_model_tails(self, out_dim, out_layer_activation): '''Build each model_tail. These are stored as Sequential models in model_tails''' + if not ps.is_list(out_layer_activation): + out_layer_activation = [out_layer_activation] * len(out_dim) model_tails = nn.ModuleList() if ps.is_empty(self.tail_hid_layers): - for out_d in out_dim: - model_tails.append(nn.Linear(self.body_hid_layers[-1], out_d)) + for out_d, out_activ in zip(out_dim, out_layer_activation): + tail = net_util.build_fc_model([self.body_hid_layers[-1], out_d], out_activ) + model_tails.append(tail) else: assert len(self.tail_hid_layers) == len(out_dim), 'Hydra tail hid_params inconsistent with number out dims' - for out_d, hid_layers in zip(out_dim, self.tail_hid_layers): + for out_d, out_activ, hid_layers in zip(out_dim, out_layer_activation, self.tail_hid_layers): dims = hid_layers model_tail = net_util.build_fc_model(dims, self.hid_layers_activation) - model_tail.add_module(str(len(model_tail)), nn.Linear(dims[-1], out_d)) + tail_out = net_util.build_fc_model([dims[-1], out_d], out_activ) + model_tail.add_module(str(len(model_tail)), tail_out) model_tails.append(model_tail) return model_tails diff --git a/slm_lab/agent/net/recurrent.py b/slm_lab/agent/net/recurrent.py index e332cb46a..e8af3ec73 100644 --- a/slm_lab/agent/net/recurrent.py +++ b/slm_lab/agent/net/recurrent.py @@ -27,6 +27,7 @@ class RecurrentNet(Net, nn.Module): "cell_type": "GRU", "fc_hid_layers": [], "hid_layers_activation": "relu", + "out_layer_activation": null, "rnn_hidden_size": 32, "rnn_num_layers": 1, "bidirectional": False, @@ -58,6 +59,7 @@ def __init__(self, net_spec, in_dim, out_dim): cell_type: any of RNN, LSTM, GRU fc_hid_layers: list of fc layers preceeding the RNN layers hid_layers_activation: activation function for the fc hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers bidirectional: if RNN should be bidirectional @@ -76,6 +78,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, cell_type='GRU', rnn_num_layers=1, bidirectional=False, @@ -93,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'cell_type', 'fc_hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'rnn_hidden_size', 'rnn_num_layers', 'bidirectional', @@ -107,13 +111,13 @@ def __init__(self, net_spec, in_dim, out_dim): 'polyak_coef', 'gpu', ]) - # fc layer: state processing model - if not ps.is_empty(self.fc_hid_layers): + # fc body: state processing model + if ps.is_empty(self.fc_hid_layers): + self.rnn_input_dim = self.in_dim + else: fc_dims = [self.in_dim] + self.fc_hid_layers self.fc_model = net_util.build_fc_model(fc_dims, self.hid_layers_activation) self.rnn_input_dim = fc_dims[-1] - else: - self.rnn_input_dim = self.in_dim # RNN model self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))( @@ -124,9 +128,16 @@ def __init__(self, net_spec, in_dim, out_dim): # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(self.rnn_hidden_size, self.out_dim) + self.model_tail = net_util.build_fc_model([self.rnn_hidden_size, self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(self.rnn_hidden_size, out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([self.rnn_hidden_size, out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): diff --git a/slm_lab/env/wrapper.py b/slm_lab/env/wrapper.py index bfb9f65e0..9a88b4707 100644 --- a/slm_lab/env/wrapper.py +++ b/slm_lab/env/wrapper.py @@ -4,7 +4,6 @@ from collections import deque from gym import spaces from slm_lab.lib import util -import cv2 import gym import numpy as np @@ -21,7 +20,7 @@ def __init__(self, env, noop_max=30): assert env.unwrapped.get_action_meanings()[0] == 'NOOP' def reset(self, **kwargs): - ''' Do no-op action for a number of steps in [1, noop_max].''' + '''Do no-op action for a number of steps in [1, noop_max].''' self.env.reset(**kwargs) if self.override_num_noops is not None: noops = self.override_num_noops @@ -151,8 +150,8 @@ def __init__(self, env): def observation(self, frame): frame = util.transform_image(frame, method='openai') - frame = np.expand_dims(frame, -1) - frame = np.swapaxes(frame, 2, 0) + frame = np.transpose(frame) # reverses all axes + frame = np.expand_dims(frame, 0) return frame diff --git a/slm_lab/lib/util.py b/slm_lab/lib/util.py index 1c9b573ea..c7a5dc3d0 100644 --- a/slm_lab/lib/util.py +++ b/slm_lab/lib/util.py @@ -781,9 +781,20 @@ def transform_image(im, method='openai'): raise ValueError('method must be one of: nature, openai') -def debug_image(im): - '''Use this method to render image the agent sees; waits for a key press before continuing''' - cv2.imshow('image', im) +def debug_image(im, is_chw=True): + ''' + Renders an image for debugging; pauses process until key press + Handles tensor/numpy and conventions among libraries + ''' + if torch.is_tensor(im): # if PyTorch tensor, get numpy + im = im.cpu().numpy() + if is_chw: # pytorch c,h,w convention + im = np.transpose(im) + im = im.astype(np.uint8) # typecast guard + if im.shape[0] == 3: # RGB image + # accommodate from RGB (numpy) to BGR (cv2) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + cv2.imshow('debug image', im) cv2.waitKey(0)