From a3eb66c2ae728b58909e86f814491cee69353c17 Mon Sep 17 00:00:00 2001 From: kengz Date: Tue, 16 Apr 2019 23:06:37 -0700 Subject: [PATCH 1/5] fix gym wrapper transpose --- slm_lab/env/wrapper.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/slm_lab/env/wrapper.py b/slm_lab/env/wrapper.py index bfb9f65e0..a3c006d05 100644 --- a/slm_lab/env/wrapper.py +++ b/slm_lab/env/wrapper.py @@ -4,7 +4,6 @@ from collections import deque from gym import spaces from slm_lab.lib import util -import cv2 import gym import numpy as np @@ -21,7 +20,7 @@ def __init__(self, env, noop_max=30): assert env.unwrapped.get_action_meanings()[0] == 'NOOP' def reset(self, **kwargs): - ''' Do no-op action for a number of steps in [1, noop_max].''' + '''Do no-op action for a number of steps in [1, noop_max].''' self.env.reset(**kwargs) if self.override_num_noops is not None: noops = self.override_num_noops @@ -151,8 +150,8 @@ def __init__(self, env): def observation(self, frame): frame = util.transform_image(frame, method='openai') + frame = np.transpose(frame) # reverses all axes frame = np.expand_dims(frame, -1) - frame = np.swapaxes(frame, 2, 0) return frame From 3757d1b2fd5540502154340e31440b0ddea65dee Mon Sep 17 00:00:00 2001 From: kengz Date: Tue, 16 Apr 2019 23:06:56 -0700 Subject: [PATCH 2/5] generalize debug_image method in util --- slm_lab/lib/util.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/slm_lab/lib/util.py b/slm_lab/lib/util.py index 1c9b573ea..33ace3026 100644 --- a/slm_lab/lib/util.py +++ b/slm_lab/lib/util.py @@ -782,8 +782,17 @@ def transform_image(im, method='openai'): def debug_image(im): - '''Use this method to render image the agent sees; waits for a key press before continuing''' - cv2.imshow('image', im) + ''' + Renders an image for debugging; pauses process until key press + Handles tensor/numpy and different conventions among libraries + ''' + if torch.is_tensor(im): # if PyTorch tensor, get numpy + im = im.cpu().numpy() + if np.argmin(im.shape) == 0: # if channel-first, transpose all axes + im = np.transpose(im) + # typecast and accommodate from RGB (numpy) to BGR (cv2) + im = cv2.cvtColor(im.astype(np.uint8), cv2.COLOR_BGR2RGB) + cv2.imshow('debug image', im) cv2.waitKey(0) From 884541e1d29c82fcc37fc0e9c9d969a70eb67714 Mon Sep 17 00:00:00 2001 From: kengz Date: Wed, 17 Apr 2019 00:01:14 -0700 Subject: [PATCH 3/5] fix wrapper expanddims, generalize debug_image better --- slm_lab/env/wrapper.py | 2 +- slm_lab/lib/util.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/slm_lab/env/wrapper.py b/slm_lab/env/wrapper.py index a3c006d05..9a88b4707 100644 --- a/slm_lab/env/wrapper.py +++ b/slm_lab/env/wrapper.py @@ -151,7 +151,7 @@ def __init__(self, env): def observation(self, frame): frame = util.transform_image(frame, method='openai') frame = np.transpose(frame) # reverses all axes - frame = np.expand_dims(frame, -1) + frame = np.expand_dims(frame, 0) return frame diff --git a/slm_lab/lib/util.py b/slm_lab/lib/util.py index 33ace3026..c7a5dc3d0 100644 --- a/slm_lab/lib/util.py +++ b/slm_lab/lib/util.py @@ -781,17 +781,19 @@ def transform_image(im, method='openai'): raise ValueError('method must be one of: nature, openai') -def debug_image(im): +def debug_image(im, is_chw=True): ''' Renders an image for debugging; pauses process until key press - Handles tensor/numpy and different conventions among libraries + Handles tensor/numpy and conventions among libraries ''' if torch.is_tensor(im): # if PyTorch tensor, get numpy im = im.cpu().numpy() - if np.argmin(im.shape) == 0: # if channel-first, transpose all axes + if is_chw: # pytorch c,h,w convention im = np.transpose(im) - # typecast and accommodate from RGB (numpy) to BGR (cv2) - im = cv2.cvtColor(im.astype(np.uint8), cv2.COLOR_BGR2RGB) + im = im.astype(np.uint8) # typecast guard + if im.shape[0] == 3: # RGB image + # accommodate from RGB (numpy) to BGR (cv2) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) cv2.imshow('debug image', im) cv2.waitKey(0) From d4f36c2f11e20e6d5a7499cb9f717b03f5c1e787 Mon Sep 17 00:00:00 2001 From: kengz Date: Wed, 17 Apr 2019 00:27:38 -0700 Subject: [PATCH 4/5] add out_layer_activation to networks --- slm_lab/agent/net/conv.py | 56 ++++++++++++++++++---------------- slm_lab/agent/net/mlp.py | 32 ++++++++++++++----- slm_lab/agent/net/recurrent.py | 23 ++++++++++---- 3 files changed, 71 insertions(+), 40 deletions(-) diff --git a/slm_lab/agent/net/conv.py b/slm_lab/agent/net/conv.py index 2e05133d0..b6e787ac6 100644 --- a/slm_lab/agent/net/conv.py +++ b/slm_lab/agent/net/conv.py @@ -31,6 +31,7 @@ class ConvNet(Net, nn.Module): ], "fc_hid_layers": [512], "hid_layers_activation": "relu", + "out_layer_activation": "tanh", "init_fn": null, "batch_norm": false, "clip_grad_val": 1.0, @@ -56,11 +57,13 @@ class ConvNet(Net, nn.Module): def __init__(self, net_spec, in_dim, out_dim): ''' net_spec: - conv_hid_layers: list containing dimensions of the convolutional hidden layers. Asssumed to all come before the flat layers. + conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation. + Asssumed to all come before the flat layers. Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)] For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md fc_hid_layers: list of fc layers following the convolutional layers hid_layers_activation: activation function for the hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer. clip_grad_val: clip gradient norm if value is not None @@ -77,6 +80,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(ConvNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, batch_norm=True, clip_grad_val=None, @@ -92,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'conv_hid_layers', 'fc_hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'batch_norm', 'clip_grad_val', @@ -104,23 +109,30 @@ def __init__(self, net_spec, in_dim, out_dim): 'gpu', ]) - # conv layer + # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() - # fc layer - if not ps.is_empty(self.fc_hid_layers): - # fc layer from flattened conv - self.fc_model = self.build_fc_layers(self.fc_hid_layers) - tail_in_dim = self.fc_hid_layers[-1] - else: + # fc body + if ps.is_empty(self.fc_hid_layers): tail_in_dim = self.conv_out_dim + else: + # fc body from flattened conv + self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) + tail_in_dim = self.fc_hid_layers[-1] # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(tail_in_dim, self.out_dim) + self.model_tail = net_util.build_fc_model([tail_in_dim, self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(tail_in_dim, out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -157,15 +169,6 @@ def build_conv_layers(self, conv_hid_layers): conv_model = nn.Sequential(*conv_layers) return conv_model - def build_fc_layers(self, fc_hid_layers): - ''' - Builds all of the fc layers in the network and store in a Sequential model - ''' - assert not ps.is_empty(fc_hid_layers) - dims = [self.conv_out_dim] + fc_hid_layers - fc_model = net_util.build_fc_model(dims, self.hid_layers_activation) - return fc_model - def forward(self, x): ''' The feedforward step @@ -292,21 +295,22 @@ def __init__(self, net_spec, in_dim, out_dim): # Guard against inappropriate algorithms and environments assert isinstance(out_dim, int) - # conv layer + # conv body self.conv_model = self.build_conv_layers(self.conv_hid_layers) self.conv_out_dim = self.get_conv_output_size() - # fc layer - if not ps.is_empty(self.fc_hid_layers): + # fc body + if ps.is_empty(self.fc_hid_layers): + tail_in_dim = self.conv_out_dim + else: # fc layer from flattened conv - self.fc_model = self.build_fc_layers(self.fc_hid_layers) + self.fc_model = net_util.build_fc_model([self.conv_out_dim] + self.fc_hid_layers, self.hid_layers_activation) tail_in_dim = self.fc_hid_layers[-1] - else: - tail_in_dim = self.conv_out_dim # tails. avoid list for single-tail for compute speed self.v = nn.Linear(tail_in_dim, 1) # state value - self.adv = nn.Linear(tail_in_dim, out_dim[0]) # action dependent raw advantage + self.adv = nn.Linear(tail_in_dim, out_dim) # action dependent raw advantage + self.model_tails = nn.ModuleList(self.v, self.adv) net_util.init_layers(self, self.init_fn) for module in self.modules(): diff --git a/slm_lab/agent/net/mlp.py b/slm_lab/agent/net/mlp.py index 50c72eac3..396f43fd4 100644 --- a/slm_lab/agent/net/mlp.py +++ b/slm_lab/agent/net/mlp.py @@ -20,6 +20,7 @@ class MLPNet(Net, nn.Module): "shared": true, "hid_layers": [32], "hid_layers_activation": "relu", + "out_layer_activation": null, "init_fn": "xavier_uniform_", "clip_grad_val": 1.0, "loss_spec": { @@ -46,6 +47,7 @@ def __init__(self, net_spec, in_dim, out_dim): net_spec: hid_layers: list containing dimensions of the hidden layers hid_layers_activation: activation function for the hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim init_fn: weight initialization function clip_grad_val: clip gradient norm if value is not None loss_spec: measure of error between model predictions and correct outputs @@ -60,6 +62,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(MLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, @@ -74,6 +77,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'shared', 'hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', @@ -90,9 +94,16 @@ def __init__(self, net_spec, in_dim, out_dim): # add last layer with no activation # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(dims[-1], self.out_dim) + self.model_tail = net_util.build_fc_model([dims[-1], self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([dims[-1], out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -160,6 +171,7 @@ class HydraMLPNet(Net, nn.Module): [] # tail, no hidden layers ], "hid_layers_activation": "relu", + "out_layer_activation": null, "init_fn": "xavier_uniform_", "clip_grad_val": 1.0, "loss_spec": { @@ -209,6 +221,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(HydraMLPNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, init_fn=None, clip_grad_val=None, loss_spec={'name': 'MSELoss'}, @@ -222,6 +235,7 @@ def __init__(self, net_spec, in_dim, out_dim): util.set_attr(self, self.net_spec, [ 'hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'init_fn', 'clip_grad_val', 'loss_spec', @@ -247,7 +261,7 @@ def __init__(self, net_spec, in_dim, out_dim): heads_out_dim = np.sum([head_hid_layers[-1] for head_hid_layers in self.head_hid_layers]) dims = [heads_out_dim] + self.body_hid_layers self.model_body = net_util.build_fc_model(dims, self.hid_layers_activation) - self.model_tails = self.build_model_tails(out_dim) + self.model_tails = self.build_model_tails(self.out_dim, self.out_layer_activation) net_util.init_layers(self, self.init_fn) for module in self.modules(): @@ -269,18 +283,20 @@ def build_model_heads(self, in_dim): model_heads.append(model_head) return model_heads - def build_model_tails(self, out_dim): + def build_model_tails(self, out_dim, out_layer_activation): '''Build each model_tail. These are stored as Sequential models in model_tails''' model_tails = nn.ModuleList() if ps.is_empty(self.tail_hid_layers): - for out_d in out_dim: - model_tails.append(nn.Linear(self.body_hid_layers[-1], out_d)) + for out_d, out_activ in zip(out_dim, out_layer_activation): + tail = net_util.build_fc_model([self.body_hid_layers[-1], out_d], out_activ) + model_tails.append(tail) else: assert len(self.tail_hid_layers) == len(out_dim), 'Hydra tail hid_params inconsistent with number out dims' - for out_d, hid_layers in zip(out_dim, self.tail_hid_layers): + for out_d, out_activ, hid_layers in zip(out_dim, out_layer_activation, self.tail_hid_layers): dims = hid_layers model_tail = net_util.build_fc_model(dims, self.hid_layers_activation) - model_tail.add_module(str(len(model_tail)), nn.Linear(dims[-1], out_d)) + tail_out = net_util.build_fc_model([dims[-1], out_d], out_activ) + model_tail.add_module(str(len(model_tail)), tail_out) model_tails.append(model_tail) return model_tails diff --git a/slm_lab/agent/net/recurrent.py b/slm_lab/agent/net/recurrent.py index e332cb46a..e8af3ec73 100644 --- a/slm_lab/agent/net/recurrent.py +++ b/slm_lab/agent/net/recurrent.py @@ -27,6 +27,7 @@ class RecurrentNet(Net, nn.Module): "cell_type": "GRU", "fc_hid_layers": [], "hid_layers_activation": "relu", + "out_layer_activation": null, "rnn_hidden_size": 32, "rnn_num_layers": 1, "bidirectional": False, @@ -58,6 +59,7 @@ def __init__(self, net_spec, in_dim, out_dim): cell_type: any of RNN, LSTM, GRU fc_hid_layers: list of fc layers preceeding the RNN layers hid_layers_activation: activation function for the fc hidden layers + out_layer_activation: activation function for the output layer, same shape as out_dim rnn_hidden_size: rnn hidden_size rnn_num_layers: number of recurrent layers bidirectional: if RNN should be bidirectional @@ -76,6 +78,7 @@ def __init__(self, net_spec, in_dim, out_dim): super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim) # set default util.set_attr(self, dict( + out_layer_activation=None, cell_type='GRU', rnn_num_layers=1, bidirectional=False, @@ -93,6 +96,7 @@ def __init__(self, net_spec, in_dim, out_dim): 'cell_type', 'fc_hid_layers', 'hid_layers_activation', + 'out_layer_activation', 'rnn_hidden_size', 'rnn_num_layers', 'bidirectional', @@ -107,13 +111,13 @@ def __init__(self, net_spec, in_dim, out_dim): 'polyak_coef', 'gpu', ]) - # fc layer: state processing model - if not ps.is_empty(self.fc_hid_layers): + # fc body: state processing model + if ps.is_empty(self.fc_hid_layers): + self.rnn_input_dim = self.in_dim + else: fc_dims = [self.in_dim] + self.fc_hid_layers self.fc_model = net_util.build_fc_model(fc_dims, self.hid_layers_activation) self.rnn_input_dim = fc_dims[-1] - else: - self.rnn_input_dim = self.in_dim # RNN model self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))( @@ -124,9 +128,16 @@ def __init__(self, net_spec, in_dim, out_dim): # tails. avoid list for single-tail for compute speed if ps.is_integer(self.out_dim): - self.model_tail = nn.Linear(self.rnn_hidden_size, self.out_dim) + self.model_tail = net_util.build_fc_model([self.rnn_hidden_size, self.out_dim], self.out_layer_activation) else: - self.model_tails = nn.ModuleList([nn.Linear(self.rnn_hidden_size, out_d) for out_d in self.out_dim]) + if not ps.is_list(self.out_layer_activation): + self.out_layer_activation = [self.out_layer_activation] * len(out_dim) + assert len(self.out_layer_activation) == len(self.out_dim) + tails = [] + for out_d, out_activ in zip(self.out_dim, self.out_layer_activation): + tail = net_util.build_fc_model([self.rnn_hidden_size, out_d], out_activ) + tails.append(tail) + self.model_tails = nn.ModuleList(tails) net_util.init_layers(self, self.init_fn) for module in self.modules(): From 2d27f5baabc8f423fc07a95129ee03dd4c058063 Mon Sep 17 00:00:00 2001 From: kengz Date: Wed, 17 Apr 2019 00:49:48 -0700 Subject: [PATCH 5/5] fix hydra --- slm_lab/agent/net/mlp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/slm_lab/agent/net/mlp.py b/slm_lab/agent/net/mlp.py index 396f43fd4..8a015593a 100644 --- a/slm_lab/agent/net/mlp.py +++ b/slm_lab/agent/net/mlp.py @@ -285,6 +285,8 @@ def build_model_heads(self, in_dim): def build_model_tails(self, out_dim, out_layer_activation): '''Build each model_tail. These are stored as Sequential models in model_tails''' + if not ps.is_list(out_layer_activation): + out_layer_activation = [out_layer_activation] * len(out_dim) model_tails = nn.ModuleList() if ps.is_empty(self.tail_hid_layers): for out_d, out_activ in zip(out_dim, out_layer_activation):