diff --git a/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_pnp.py b/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_pnp.py index 4cb695414..19362df5a 100644 --- a/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_pnp.py +++ b/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_pnp.py @@ -3,18 +3,17 @@ import numpy as np import rospy -from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline -from rllab.envs.normalized_env import normalize -from rllab.misc.instrument import run_experiment_lite - -from sandbox.rocky.tf.algos.trpo import TRPO -from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy -from sandbox.rocky.tf.envs.base import TfEnv - from contrib.ros.envs.example_launchers import model_dir from contrib.ros.envs.sawyer.pick_and_place_env import PickAndPlaceEnv from contrib.ros.util.task_object_manager import TaskObject, TaskObjectManager +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import run_experiment_lite +from rllab.tf.algos.trpo import TRPO +from rllab.tf.envs.base import TfEnv +from rllab.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy + def run_task(*_): block = TaskObject( diff --git a/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_push.py b/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_push.py index 7da0fef03..5e591a5e9 100644 --- a/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_push.py +++ b/contrib/ros/envs/example_launchers/trpo_gazebo_sawyer_push.py @@ -3,18 +3,17 @@ import numpy as np import rospy -from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline -from rllab.envs.normalized_env import normalize -from rllab.misc.instrument import run_experiment_lite - -from sandbox.rocky.tf.algos.trpo import TRPO -from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy -from sandbox.rocky.tf.envs.base import TfEnv - from contrib.ros.envs.example_launchers import model_dir from contrib.ros.envs.sawyer.push_env import PushEnv from contrib.ros.util.task_object_manager import TaskObject, TaskObjectManager +from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline +from rllab.envs.normalized_env import normalize +from rllab.misc.instrument import run_experiment_lite +from rllab.tf.algos.trpo import TRPO +from rllab.tf.envs.base import TfEnv +from rllab.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy + def run_task(*_): table = TaskObject( @@ -75,4 +74,4 @@ def run_task(*_): run_task, n_parallel=1, plot=False, -) \ No newline at end of file +) diff --git a/examples/cluster_gym_mujoco_demo.py b/examples/cluster_gym_mujoco_demo.py index 3e9cace72..528685375 100644 --- a/examples/cluster_gym_mujoco_demo.py +++ b/examples/cluster_gym_mujoco_demo.py @@ -1,13 +1,13 @@ +import sys + from rllab.baselines import LinearFeatureBaseline +from rllab.envs import GymEnv from rllab.envs import normalize -from sandbox.rocky.tf.envs import TfEnv -from sandbox.rocky.tf.policies import GaussianMLPPolicy -from sandbox.rocky.tf.algos import TRPO from rllab.misc import run_experiment_lite -from rllab.envs import GymEnv -import sys - from rllab.misc import VariantGenerator, variant +from rllab.tf.algos import TRPO +from rllab.tf.envs import TfEnv +from rllab.tf.policies import GaussianMLPPolicy class VG(VariantGenerator): diff --git a/examples/trpo_gym_tf_cartpole.py b/examples/trpo_gym_tf_cartpole.py index d440d688d..ce9edf2ac 100644 --- a/examples/trpo_gym_tf_cartpole.py +++ b/examples/trpo_gym_tf_cartpole.py @@ -2,10 +2,9 @@ from rllab.envs import GymEnv from rllab.envs import normalize from rllab.misc import stub, run_experiment_lite - -from sandbox.rocky.tf.envs import TfEnv -from sandbox.rocky.tf.policies import CategoricalMLPPolicy -from sandbox.rocky.tf.algos import TRPO +from rllab.tf.algos import TRPO +from rllab.tf.envs import TfEnv +from rllab.tf.policies import CategoricalMLPPolicy stub(globals()) diff --git a/sandbox/__init__.py b/rllab/tf/__init__.py similarity index 100% rename from sandbox/__init__.py rename to rllab/tf/__init__.py diff --git a/rllab/tf/algos/__init__.py b/rllab/tf/algos/__init__.py new file mode 100644 index 000000000..c3b7d7a95 --- /dev/null +++ b/rllab/tf/algos/__init__.py @@ -0,0 +1,4 @@ +from rllab.tf.algos.batch_polopt import BatchPolopt +from rllab.tf.algos.npo import NPO +from rllab.tf.algos.trpo import TRPO +from rllab.tf.algos.vpg import VPG diff --git a/sandbox/rocky/tf/algos/batch_polopt.py b/rllab/tf/algos/batch_polopt.py similarity index 97% rename from sandbox/rocky/tf/algos/batch_polopt.py rename to rllab/tf/algos/batch_polopt.py index c945c176a..83fc0ece4 100644 --- a/sandbox/rocky/tf/algos/batch_polopt.py +++ b/rllab/tf/algos/batch_polopt.py @@ -1,11 +1,13 @@ import time + +import tensorflow as tf + from rllab.algos import RLAlgorithm import rllab.misc.logger as logger -from sandbox.rocky.tf.policies.base import Policy -import tensorflow as tf -from sandbox.rocky.tf.samplers import BatchSampler -from sandbox.rocky.tf.samplers import VectorizedSampler from rllab.sampler.utils import rollout +from rllab.tf.policies.base import Policy +from rllab.tf.samplers import BatchSampler +from rllab.tf.samplers import VectorizedSampler class BatchPolopt(RLAlgorithm): @@ -103,7 +105,7 @@ def train(self, sess=None): if sess is None: sess = tf.Session() sess.__enter__() - + sess.run(tf.global_variables_initializer()) self.start_worker() start_time = time.time() diff --git a/sandbox/rocky/tf/algos/npg.py b/rllab/tf/algos/npg.py similarity index 100% rename from sandbox/rocky/tf/algos/npg.py rename to rllab/tf/algos/npg.py diff --git a/sandbox/rocky/tf/algos/npo.py b/rllab/tf/algos/npo.py similarity index 95% rename from sandbox/rocky/tf/algos/npo.py rename to rllab/tf/algos/npo.py index 9ba30ef02..f6422f060 100644 --- a/sandbox/rocky/tf/algos/npo.py +++ b/rllab/tf/algos/npo.py @@ -1,14 +1,13 @@ - +import tensorflow as tf from rllab.misc import ext from rllab.misc.overrides import overrides import rllab.misc.logger as logger -from sandbox.rocky.tf.optimizers import PenaltyLbfgsOptimizer -from sandbox.rocky.tf.algos import BatchPolopt -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -import tensorflow as tf +from rllab.tf.algos import BatchPolopt +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.optimizers import PenaltyLbfgsOptimizer class NPO(BatchPolopt): diff --git a/sandbox/rocky/tf/algos/trpo.py b/rllab/tf/algos/trpo.py similarity index 80% rename from sandbox/rocky/tf/algos/trpo.py rename to rllab/tf/algos/trpo.py index 5c3222efd..7e57dc1da 100644 --- a/sandbox/rocky/tf/algos/trpo.py +++ b/rllab/tf/algos/trpo.py @@ -1,7 +1,5 @@ - - -from sandbox.rocky.tf.algos import NPO -from sandbox.rocky.tf.optimizers import ConjugateGradientOptimizer +from rllab.tf.algos import NPO +from rllab.tf.optimizers import ConjugateGradientOptimizer class TRPO(NPO): diff --git a/sandbox/rocky/tf/algos/vpg.py b/rllab/tf/algos/vpg.py similarity index 95% rename from sandbox/rocky/tf/algos/vpg.py rename to rllab/tf/algos/vpg.py index 3e74edbc5..df0062122 100644 --- a/sandbox/rocky/tf/algos/vpg.py +++ b/rllab/tf/algos/vpg.py @@ -1,14 +1,13 @@ +import tensorflow as tf - +from rllab.core import Serializable from rllab.misc import logger from rllab.misc import ext from rllab.misc.overrides import overrides -from sandbox.rocky.tf.algos import BatchPolopt -from sandbox.rocky.tf.optimizers import FirstOrderOptimizer -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from rllab.core import Serializable -import tensorflow as tf +from rllab.tf.algos import BatchPolopt +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.optimizers import FirstOrderOptimizer class VPG(BatchPolopt, Serializable): diff --git a/rllab/tf/core/__init__.py b/rllab/tf/core/__init__.py new file mode 100644 index 000000000..6ed36fdcd --- /dev/null +++ b/rllab/tf/core/__init__.py @@ -0,0 +1,8 @@ +from rllab.tf.core import layers +from rllab.tf.core.layers_powered import LayersPowered +from rllab.tf.core.network import MLP +from rllab.tf.core.network import GRUNetwork +from rllab.tf.core.network import ConvNetwork +from rllab.tf.core.network import LSTMNetwork +from rllab.tf.core.parameterized import JointParameterized +from rllab.tf.core.parameterized import Parameterized diff --git a/sandbox/rocky/tf/core/layers.py b/rllab/tf/core/layers.py similarity index 100% rename from sandbox/rocky/tf/core/layers.py rename to rllab/tf/core/layers.py diff --git a/sandbox/rocky/tf/core/layers_powered.py b/rllab/tf/core/layers_powered.py similarity index 83% rename from sandbox/rocky/tf/core/layers_powered.py rename to rllab/tf/core/layers_powered.py index 164e34c5f..bfcea9f01 100644 --- a/sandbox/rocky/tf/core/layers_powered.py +++ b/rllab/tf/core/layers_powered.py @@ -1,7 +1,8 @@ -from sandbox.rocky.tf.core import Parameterized -from sandbox.rocky.tf.core import layers as L import itertools +from rllab.tf.core import Parameterized +from rllab.tf.core import layers as L + class LayersPowered(Parameterized): diff --git a/sandbox/rocky/tf/core/network.py b/rllab/tf/core/network.py similarity index 99% rename from sandbox/rocky/tf/core/network.py rename to rllab/tf/core/network.py index 55aeca9e7..9839ea5ae 100644 --- a/sandbox/rocky/tf/core/network.py +++ b/rllab/tf/core/network.py @@ -1,10 +1,12 @@ -from sandbox.rocky.tf.core import layers as L -import tensorflow as tf -import numpy as np import itertools + +import numpy as np +import tensorflow as tf + from rllab.core import Serializable -from sandbox.rocky.tf.core import Parameterized -from sandbox.rocky.tf.core import LayersPowered +from rllab.tf.core import layers as L +from rllab.tf.core import Parameterized +from rllab.tf.core import LayersPowered class MLP(LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/core/parameterized.py b/rllab/tf/core/parameterized.py similarity index 100% rename from sandbox/rocky/tf/core/parameterized.py rename to rllab/tf/core/parameterized.py diff --git a/rllab/tf/distributions/__init__.py b/rllab/tf/distributions/__init__.py new file mode 100644 index 000000000..296964eaf --- /dev/null +++ b/rllab/tf/distributions/__init__.py @@ -0,0 +1,6 @@ +from rllab.tf.distributions.base import Distribution +from rllab.tf.distributions.bernoulli import Bernoulli +from rllab.tf.distributions.categorical import Categorical +from rllab.tf.distributions.diagonal_gaussian import DiagonalGaussian +from rllab.tf.distributions.recurrent_categorical import RecurrentCategorical +from rllab.tf.distributions.recurrent_diagonal_gaussian import RecurrentDiagonalGaussian diff --git a/sandbox/rocky/tf/distributions/base.py b/rllab/tf/distributions/base.py similarity index 100% rename from sandbox/rocky/tf/distributions/base.py rename to rllab/tf/distributions/base.py diff --git a/sandbox/rocky/tf/distributions/bernoulli.py b/rllab/tf/distributions/bernoulli.py similarity index 95% rename from sandbox/rocky/tf/distributions/bernoulli.py rename to rllab/tf/distributions/bernoulli.py index c86c107b4..790006c0a 100644 --- a/sandbox/rocky/tf/distributions/bernoulli.py +++ b/rllab/tf/distributions/bernoulli.py @@ -1,9 +1,8 @@ - - -from .base import Distribution -import tensorflow as tf import numpy as np -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope +import tensorflow as tf + +from rllab.distributions.base import Distribution +from rllab.tf.misc.tensor_utils import enclosing_scope TINY = 1e-8 diff --git a/sandbox/rocky/tf/distributions/categorical.py b/rllab/tf/distributions/categorical.py similarity index 97% rename from sandbox/rocky/tf/distributions/categorical.py rename to rllab/tf/distributions/categorical.py index ac53552cf..8a05722c4 100644 --- a/sandbox/rocky/tf/distributions/categorical.py +++ b/rllab/tf/distributions/categorical.py @@ -1,7 +1,8 @@ import numpy as np -from .base import Distribution import tensorflow as tf -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope + +from rllab.distributions.base import Distribution +from rllab.tf.misc.tensor_utils import enclosing_scope TINY = 1e-8 diff --git a/sandbox/rocky/tf/distributions/diagonal_gaussian.py b/rllab/tf/distributions/diagonal_gaussian.py similarity index 97% rename from sandbox/rocky/tf/distributions/diagonal_gaussian.py rename to rllab/tf/distributions/diagonal_gaussian.py index 1e0f43ad5..320628c7f 100644 --- a/sandbox/rocky/tf/distributions/diagonal_gaussian.py +++ b/rllab/tf/distributions/diagonal_gaussian.py @@ -1,10 +1,8 @@ - - - -import tensorflow as tf import numpy as np -from sandbox.rocky.tf.distributions import Distribution -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope +import tensorflow as tf + +from rllab.tf.distributions import Distribution +from rllab.tf.misc.tensor_utils import enclosing_scope class DiagonalGaussian(Distribution): diff --git a/sandbox/rocky/tf/distributions/recurrent_categorical.py b/rllab/tf/distributions/recurrent_categorical.py similarity index 94% rename from sandbox/rocky/tf/distributions/recurrent_categorical.py rename to rllab/tf/distributions/recurrent_categorical.py index 557685b2a..7e139dfa9 100644 --- a/sandbox/rocky/tf/distributions/recurrent_categorical.py +++ b/rllab/tf/distributions/recurrent_categorical.py @@ -1,8 +1,9 @@ -import tensorflow as tf import numpy as np -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.distributions import Distribution +import tensorflow as tf + +from rllab.tf.distributions import Categorical +from rllab.tf.distributions import Distribution +from rllab.tf.misc.tensor_utils import enclosing_scope TINY = 1e-8 diff --git a/rllab/tf/distributions/recurrent_diagonal_gaussian.py b/rllab/tf/distributions/recurrent_diagonal_gaussian.py new file mode 100644 index 000000000..5e21bdefd --- /dev/null +++ b/rllab/tf/distributions/recurrent_diagonal_gaussian.py @@ -0,0 +1,3 @@ +from rllab.tf.distributions import DiagonalGaussian + +RecurrentDiagonalGaussian = DiagonalGaussian diff --git a/rllab/tf/envs/__init__.py b/rllab/tf/envs/__init__.py new file mode 100644 index 000000000..4bbed2738 --- /dev/null +++ b/rllab/tf/envs/__init__.py @@ -0,0 +1,4 @@ +from rllab.tf.envs.base import TfEnv +from rllab.tf.envs.base import to_tf_space +from rllab.tf.envs.parallel_vec_env_executor import ParallelVecEnvExecutor +from rllab.tf.envs.vec_env_executor import VecEnvExecutor diff --git a/sandbox/rocky/tf/envs/base.py b/rllab/tf/envs/base.py similarity index 94% rename from sandbox/rocky/tf/envs/base.py rename to rllab/tf/envs/base.py index 1b77f868e..45e402303 100644 --- a/sandbox/rocky/tf/envs/base.py +++ b/rllab/tf/envs/base.py @@ -3,9 +3,9 @@ from rllab.spaces import Box as TheanoBox from rllab.spaces import Discrete as TheanoDiscrete from rllab.spaces import Product as TheanoProduct -from sandbox.rocky.tf.spaces import Discrete -from sandbox.rocky.tf.spaces import Box -from sandbox.rocky.tf.spaces import Product +from rllab.tf.spaces import Discrete +from rllab.tf.spaces import Box +from rllab.tf.spaces import Product from cached_property import cached_property diff --git a/sandbox/rocky/tf/envs/parallel_vec_env_executor.py b/rllab/tf/envs/parallel_vec_env_executor.py similarity index 99% rename from sandbox/rocky/tf/envs/parallel_vec_env_executor.py rename to rllab/tf/envs/parallel_vec_env_executor.py index 3ab0267ec..db79eaf39 100644 --- a/sandbox/rocky/tf/envs/parallel_vec_env_executor.py +++ b/rllab/tf/envs/parallel_vec_env_executor.py @@ -1,12 +1,11 @@ - +import uuid import numpy as np import pickle as pickle -from sandbox.rocky.tf.misc import tensor_utils -from rllab.misc import logger +from rllab.misc import logger from rllab.sampler import singleton_pool -import uuid +from rllab.tf.misc import tensor_utils def worker_init_envs(G, alloc, scope, env): diff --git a/sandbox/rocky/tf/envs/vec_env_executor.py b/rllab/tf/envs/vec_env_executor.py similarity index 96% rename from sandbox/rocky/tf/envs/vec_env_executor.py rename to rllab/tf/envs/vec_env_executor.py index af132731d..1f462b03d 100644 --- a/sandbox/rocky/tf/envs/vec_env_executor.py +++ b/rllab/tf/envs/vec_env_executor.py @@ -2,7 +2,7 @@ import numpy as np import pickle as pickle -from sandbox.rocky.tf.misc import tensor_utils +from rllab.tf.misc import tensor_utils class VecEnvExecutor(object): diff --git a/sandbox/rocky/tf/launchers/__init__.py b/rllab/tf/launchers/__init__.py similarity index 100% rename from sandbox/rocky/tf/launchers/__init__.py rename to rllab/tf/launchers/__init__.py diff --git a/sandbox/rocky/tf/launchers/trpo_cartpole.py b/rllab/tf/launchers/trpo_cartpole.py similarity index 74% rename from sandbox/rocky/tf/launchers/trpo_cartpole.py rename to rllab/tf/launchers/trpo_cartpole.py index 41402e735..18ada7bc7 100644 --- a/sandbox/rocky/tf/launchers/trpo_cartpole.py +++ b/rllab/tf/launchers/trpo_cartpole.py @@ -1,12 +1,12 @@ -from sandbox.rocky.tf.algos import TRPO from rllab.baselines import LinearFeatureBaseline from rllab.envs.box2d import CartpoleEnv from rllab.envs.normalized_env import normalize -from sandbox.rocky.tf.optimizers import ConjugateGradientOptimizer -from sandbox.rocky.tf.optimizers import FiniteDifferenceHvp -from sandbox.rocky.tf.policies import GaussianMLPPolicy -from sandbox.rocky.tf.envs import TfEnv from rllab.misc import stub, run_experiment_lite +from rllab.tf.algos import TRPO +from rllab.tf.envs import TfEnv +from rllab.tf.optimizers import ConjugateGradientOptimizer +from rllab.tf.optimizers import FiniteDifferenceHvp +from rllab.tf.policies import GaussianMLPPolicy env = TfEnv(normalize(CartpoleEnv())) diff --git a/sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py b/rllab/tf/launchers/trpo_cartpole_recurrent.py similarity index 67% rename from sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py rename to rllab/tf/launchers/trpo_cartpole_recurrent.py index b0523c181..8e77424c2 100644 --- a/sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py +++ b/rllab/tf/launchers/trpo_cartpole_recurrent.py @@ -1,13 +1,14 @@ -from sandbox.rocky.tf.algos import TRPO from rllab.baselines import LinearFeatureBaseline from rllab.envs.box2d import CartpoleEnv from rllab.envs import normalize -from sandbox.rocky.tf.policies import GaussianGRUPolicy -from sandbox.rocky.tf.policies import GaussianLSTMPolicy -from sandbox.rocky.tf.envs import TfEnv -import sandbox.rocky.tf.core.layers as L -from sandbox.rocky.tf.optimizers import ConjugateGradientOptimizer, FiniteDifferenceHvp from rllab.misc import stub, run_experiment_lite +from rllab.tf.algos import TRPO +import rllab.tf.core.layers as L +from rllab.tf.envs import TfEnv +from rllab.tf.optimizers import ConjugateGradientOptimizer, FiniteDifferenceHvp +from rllab.tf.policies import GaussianGRUPolicy +from rllab.tf.policies import GaussianLSTMPolicy + env = TfEnv(normalize(CartpoleEnv())) diff --git a/sandbox/rocky/tf/launchers/vpg_cartpole.py b/rllab/tf/launchers/vpg_cartpole.py similarity index 84% rename from sandbox/rocky/tf/launchers/vpg_cartpole.py rename to rllab/tf/launchers/vpg_cartpole.py index 1343c995f..f69e9f4e1 100644 --- a/sandbox/rocky/tf/launchers/vpg_cartpole.py +++ b/rllab/tf/launchers/vpg_cartpole.py @@ -1,10 +1,10 @@ -from sandbox.rocky.tf.algos import VPG from rllab.baselines import LinearFeatureBaseline -from rllab.envs.box2d import CartpoleEnv from rllab.envs import normalize -from sandbox.rocky.tf.policies import GaussianMLPPolicy -from sandbox.rocky.tf.envs import TfEnv +from rllab.envs.box2d import CartpoleEnv from rllab.misc import stub, run_experiment_lite +from rllab.tf.algos import VPG +from rllab.tf.envs import TfEnv +from rllab.tf.policies import GaussianMLPPolicy env = TfEnv(normalize(CartpoleEnv())) diff --git a/sandbox/rocky/tf/misc/__init__.py b/rllab/tf/misc/__init__.py similarity index 100% rename from sandbox/rocky/tf/misc/__init__.py rename to rllab/tf/misc/__init__.py diff --git a/sandbox/rocky/tf/misc/tensor_utils.py b/rllab/tf/misc/tensor_utils.py similarity index 100% rename from sandbox/rocky/tf/misc/tensor_utils.py rename to rllab/tf/misc/tensor_utils.py index 0441f4c53..b43a8d2f7 100644 --- a/sandbox/rocky/tf/misc/tensor_utils.py +++ b/rllab/tf/misc/tensor_utils.py @@ -1,5 +1,5 @@ -import tensorflow as tf import numpy as np +import tensorflow as tf def compile_function(inputs, outputs, log_name=None): diff --git a/rllab/tf/optimizers/__init__.py b/rllab/tf/optimizers/__init__.py new file mode 100644 index 000000000..7c0b74c48 --- /dev/null +++ b/rllab/tf/optimizers/__init__.py @@ -0,0 +1,5 @@ +from rllab.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer +from rllab.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp +from rllab.tf.optimizers.first_order_optimizer import FirstOrderOptimizer +from rllab.tf.optimizers.lbfgs_optimizer import LbfgsOptimizer +from rllab.tf.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer diff --git a/sandbox/rocky/tf/optimizers/conjugate_gradient_optimizer.py b/rllab/tf/optimizers/conjugate_gradient_optimizer.py similarity index 98% rename from sandbox/rocky/tf/optimizers/conjugate_gradient_optimizer.py rename to rllab/tf/optimizers/conjugate_gradient_optimizer.py index 5948f9b35..534effb63 100644 --- a/sandbox/rocky/tf/optimizers/conjugate_gradient_optimizer.py +++ b/rllab/tf/optimizers/conjugate_gradient_optimizer.py @@ -1,14 +1,15 @@ -from rllab.misc import ext -from rllab.misc import krylov -from rllab.misc import logger -from rllab.core import Serializable -# from rllab.misc import flatten_tensor_variables import itertools + import numpy as np import tensorflow as tf -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope + +from rllab.core import Serializable +from rllab.misc import ext +from rllab.misc import krylov +from rllab.misc import logger from rllab.misc import sliced_fun +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope class PerlmutterHvp(object): diff --git a/sandbox/rocky/tf/optimizers/first_order_optimizer.py b/rllab/tf/optimizers/first_order_optimizer.py similarity index 95% rename from sandbox/rocky/tf/optimizers/first_order_optimizer.py rename to rllab/tf/optimizers/first_order_optimizer.py index a931f84f7..b95fc99d6 100644 --- a/sandbox/rocky/tf/optimizers/first_order_optimizer.py +++ b/rllab/tf/optimizers/first_order_optimizer.py @@ -7,9 +7,8 @@ from rllab.misc import ext from rllab.misc import logger from rllab.optimizers import BatchDataset -from sandbox.rocky.tf.misc import tensor_utils -# from rllab.algo.first_order_method import parse_update_method -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope class FirstOrderOptimizer(Serializable): @@ -146,4 +145,4 @@ def optimize(self, inputs, extra_inputs=None, callback=None): if abs(last_loss - new_loss) < self._tolerance: break - last_loss = new_loss \ No newline at end of file + last_loss = new_loss diff --git a/sandbox/rocky/tf/optimizers/lbfgs_optimizer.py b/rllab/tf/optimizers/lbfgs_optimizer.py similarity index 96% rename from sandbox/rocky/tf/optimizers/lbfgs_optimizer.py rename to rllab/tf/optimizers/lbfgs_optimizer.py index 64e3c1493..29cba76bc 100644 --- a/sandbox/rocky/tf/optimizers/lbfgs_optimizer.py +++ b/rllab/tf/optimizers/lbfgs_optimizer.py @@ -5,8 +5,8 @@ from rllab.core import Serializable from rllab.misc import ext -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope class LbfgsOptimizer(Serializable): @@ -100,4 +100,4 @@ def opt_callback(params): x0=self._target.get_param_values(trainable=True), maxiter=self._max_opt_itr, callback=opt_callback, - ) \ No newline at end of file + ) diff --git a/sandbox/rocky/tf/optimizers/penalty_lbfgs_optimizer.py b/rllab/tf/optimizers/penalty_lbfgs_optimizer.py similarity index 98% rename from sandbox/rocky/tf/optimizers/penalty_lbfgs_optimizer.py rename to rllab/tf/optimizers/penalty_lbfgs_optimizer.py index 33a26eca2..d0c286b5d 100644 --- a/sandbox/rocky/tf/optimizers/penalty_lbfgs_optimizer.py +++ b/rllab/tf/optimizers/penalty_lbfgs_optimizer.py @@ -1,11 +1,13 @@ -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from rllab.misc import logger -from rllab.misc import ext -from rllab.core import Serializable -import tensorflow as tf import numpy as np import scipy.optimize +import tensorflow as tf + +from rllab.core import Serializable +from rllab.misc import logger +from rllab.misc import ext +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope + class PenaltyLbfgsOptimizer(Serializable): diff --git a/rllab/tf/policies/__init__.py b/rllab/tf/policies/__init__.py new file mode 100644 index 000000000..aaff72cc3 --- /dev/null +++ b/rllab/tf/policies/__init__.py @@ -0,0 +1,6 @@ +from rllab.tf.policies.base import Policy +from rllab.tf.policies.base import StochasticPolicy +from rllab.tf.policies.categorical_mlp_policy import CategoricalMLPPolicy +from rllab.tf.policies.gaussian_gru_policy import GaussianGRUPolicy +from rllab.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy +from rllab.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy diff --git a/sandbox/rocky/tf/policies/base.py b/rllab/tf/policies/base.py similarity index 98% rename from sandbox/rocky/tf/policies/base.py rename to rllab/tf/policies/base.py index a1742dfb9..a3eb9944f 100644 --- a/sandbox/rocky/tf/policies/base.py +++ b/rllab/tf/policies/base.py @@ -1,7 +1,4 @@ - - - -from sandbox.rocky.tf.core import Parameterized +from rllab.tf.core import Parameterized class Policy(Parameterized): diff --git a/sandbox/rocky/tf/policies/categorical_conv_policy.py b/rllab/tf/policies/categorical_conv_policy.py similarity index 89% rename from sandbox/rocky/tf/policies/categorical_conv_policy.py rename to rllab/tf/policies/categorical_conv_policy.py index b674caab7..6896a73a2 100644 --- a/sandbox/rocky/tf/policies/categorical_conv_policy.py +++ b/rllab/tf/policies/categorical_conv_policy.py @@ -1,15 +1,15 @@ import tensorflow as tf -import sandbox.rocky.tf.core.layers as L from rllab.core import Serializable from rllab.misc.overrides import overrides -from sandbox.rocky.tf.core import ConvNetwork -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from sandbox.rocky.tf.policies import StochasticPolicy -from sandbox.rocky.tf.spaces import Discrete +from rllab.tf.core import ConvNetwork +from rllab.tf.core import LayersPowered +import rllab.tf.core.layers as L +from rllab.tf.distributions import Categorical +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy +from rllab.tf.spaces import Discrete class CategoricalConvPolicy(StochasticPolicy, LayersPowered, Serializable): @@ -102,4 +102,4 @@ def get_actions(self, observations): @property def distribution(self): - return self._dist \ No newline at end of file + return self._dist diff --git a/sandbox/rocky/tf/policies/categorical_gru_policy.py b/rllab/tf/policies/categorical_gru_policy.py similarity index 94% rename from sandbox/rocky/tf/policies/categorical_gru_policy.py rename to rllab/tf/policies/categorical_gru_policy.py index 9985e5b19..ed53ac0eb 100644 --- a/sandbox/rocky/tf/policies/categorical_gru_policy.py +++ b/rllab/tf/policies/categorical_gru_policy.py @@ -1,17 +1,17 @@ import numpy as np -import sandbox.rocky.tf.core.layers as L import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import GRUNetwork, MLP -from sandbox.rocky.tf.distributions import RecurrentCategorical -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from sandbox.rocky.tf.spaces import Discrete -from sandbox.rocky.tf.policies import StochasticPolicy from rllab.core import Serializable from rllab.misc import special from rllab.misc.overrides import overrides +from rllab.tf.core import LayersPowered +from rllab.tf.core import GRUNetwork, MLP +import rllab.tf.core.layers as L +from rllab.tf.distributions import RecurrentCategorical +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy +from rllab.tf.spaces import Discrete class CategoricalGRUPolicy(StochasticPolicy, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/policies/categorical_lstm_policy.py b/rllab/tf/policies/categorical_lstm_policy.py similarity index 95% rename from sandbox/rocky/tf/policies/categorical_lstm_policy.py rename to rllab/tf/policies/categorical_lstm_policy.py index a97ec3fcf..5abd28dfd 100644 --- a/sandbox/rocky/tf/policies/categorical_lstm_policy.py +++ b/rllab/tf/policies/categorical_lstm_policy.py @@ -1,17 +1,17 @@ import numpy as np -import sandbox.rocky.tf.core.layers as L import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import LSTMNetwork, MLP -from sandbox.rocky.tf.distributions import RecurrentCategorical -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from sandbox.rocky.tf.spaces import Discrete -from sandbox.rocky.tf.policies import StochasticPolicy from rllab.core import Serializable from rllab.misc import special from rllab.misc.overrides import overrides +from rllab.tf.core import LayersPowered +from rllab.tf.core import LSTMNetwork, MLP +import rllab.tf.core.layers as L +from rllab.tf.distributions import RecurrentCategorical +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy +from rllab.tf.spaces import Discrete class CategoricalLSTMPolicy(StochasticPolicy, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/policies/categorical_mlp_policy.py b/rllab/tf/policies/categorical_mlp_policy.py similarity index 90% rename from sandbox/rocky/tf/policies/categorical_mlp_policy.py rename to rllab/tf/policies/categorical_mlp_policy.py index fbceeb6b9..2fbc286ea 100644 --- a/sandbox/rocky/tf/policies/categorical_mlp_policy.py +++ b/rllab/tf/policies/categorical_mlp_policy.py @@ -1,14 +1,15 @@ -from sandbox.rocky.tf.core import LayersPowered -import sandbox.rocky.tf.core.layers as L -from sandbox.rocky.tf.core import MLP +import tensorflow as tf + from rllab.core import Serializable -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.policies import StochasticPolicy from rllab.misc import ext -from sandbox.rocky.tf.misc import tensor_utils from rllab.misc.overrides import overrides -from sandbox.rocky.tf.spaces import Discrete -import tensorflow as tf +from rllab.tf.core import LayersPowered +import rllab.tf.core.layers as L +from rllab.tf.core import MLP +from rllab.tf.distributions import Categorical +from rllab.tf.policies import StochasticPolicy +from rllab.tf.misc import tensor_utils +from rllab.tf.spaces import Discrete class CategoricalMLPPolicy(StochasticPolicy, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/policies/deterministic_mlp_policy.py b/rllab/tf/policies/deterministic_mlp_policy.py similarity index 86% rename from sandbox/rocky/tf/policies/deterministic_mlp_policy.py rename to rllab/tf/policies/deterministic_mlp_policy.py index ba6a23159..e959eb15b 100644 --- a/sandbox/rocky/tf/policies/deterministic_mlp_policy.py +++ b/rllab/tf/policies/deterministic_mlp_policy.py @@ -1,17 +1,18 @@ +import tensorflow as tf + from rllab.core import Serializable from rllab.misc import ext from rllab.misc.overrides import overrides -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.policies import Policy -from sandbox.rocky.tf.misc import tensor_utils +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +import rllab.tf.core.layers as L +from rllab.tf.core.layers import batch_norm +from rllab.tf.distributions import Categorical +from rllab.tf.policies import Policy +from rllab.tf.misc import tensor_utils -import sandbox.rocky.tf.core.layers as L -from sandbox.rocky.tf.core.layers import batch_norm -from sandbox.rocky.tf.spaces import Discrete -import tensorflow as tf +from rllab.tf.spaces import Discrete class DeterministicMLPPolicy(Policy, LayersPowered, Serializable): @@ -57,7 +58,7 @@ def __init__( @property def vectorized(self): return True - + @overrides def get_action(self, observation): flat_obs = self.observation_space.flatten(observation) diff --git a/sandbox/rocky/tf/policies/gaussian_gru_policy.py b/rllab/tf/policies/gaussian_gru_policy.py similarity index 96% rename from sandbox/rocky/tf/policies/gaussian_gru_policy.py rename to rllab/tf/policies/gaussian_gru_policy.py index af7526791..452332090 100644 --- a/sandbox/rocky/tf/policies/gaussian_gru_policy.py +++ b/rllab/tf/policies/gaussian_gru_policy.py @@ -1,16 +1,16 @@ import numpy as np -import sandbox.rocky.tf.core.layers as L import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import GRUNetwork -from sandbox.rocky.tf.distributions import RecurrentDiagonalGaussian -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -from sandbox.rocky.tf.policies import StochasticPolicy from rllab.core import Serializable from rllab.misc.overrides import overrides from rllab.misc import logger +from rllab.tf.core import LayersPowered +from rllab.tf.core import GRUNetwork +import rllab.tf.core.layers as L +from rllab.tf.distributions import RecurrentDiagonalGaussian +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy class GaussianGRUPolicy(StochasticPolicy, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/policies/gaussian_lstm_policy.py b/rllab/tf/policies/gaussian_lstm_policy.py similarity index 96% rename from sandbox/rocky/tf/policies/gaussian_lstm_policy.py rename to rllab/tf/policies/gaussian_lstm_policy.py index 4760f89ee..f087ec02c 100644 --- a/sandbox/rocky/tf/policies/gaussian_lstm_policy.py +++ b/rllab/tf/policies/gaussian_lstm_policy.py @@ -1,15 +1,15 @@ import numpy as np -import sandbox.rocky.tf.core.layers as L import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import LSTMNetwork -from sandbox.rocky.tf.distributions import RecurrentDiagonalGaussian -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.policies import StochasticPolicy -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope from rllab.core import Serializable from rllab.misc.overrides import overrides +from rllab.tf.core import LayersPowered +from rllab.tf.core import LSTMNetwork +import rllab.tf.core.layers as L +from rllab.tf.distributions import RecurrentDiagonalGaussian +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy class GaussianLSTMPolicy(StochasticPolicy, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/policies/gaussian_mlp_policy.py b/rllab/tf/policies/gaussian_mlp_policy.py similarity index 95% rename from sandbox/rocky/tf/policies/gaussian_mlp_policy.py rename to rllab/tf/policies/gaussian_mlp_policy.py index eec182a52..66451a2ca 100644 --- a/sandbox/rocky/tf/policies/gaussian_mlp_policy.py +++ b/rllab/tf/policies/gaussian_mlp_policy.py @@ -1,18 +1,17 @@ import numpy as np - -from sandbox.rocky.tf.core import LayersPowered -import sandbox.rocky.tf.core.layers as L -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.spaces import Box +import tensorflow as tf from rllab.core import Serializable -from sandbox.rocky.tf.policies import StochasticPolicy -from sandbox.rocky.tf.distributions import DiagonalGaussian -from rllab.misc.overrides import overrides from rllab.misc import logger -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope -import tensorflow as tf +from rllab.misc.overrides import overrides +from rllab.tf.core import LayersPowered +import rllab.tf.core.layers as L +from rllab.tf.core import MLP +from rllab.tf.distributions import DiagonalGaussian +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.policies import StochasticPolicy +from rllab.tf.spaces import Box class GaussianMLPPolicy(StochasticPolicy, LayersPowered, Serializable): def __init__( diff --git a/sandbox/rocky/tf/policies/uniform_control_policy.py b/rllab/tf/policies/uniform_control_policy.py similarity index 92% rename from sandbox/rocky/tf/policies/uniform_control_policy.py rename to rllab/tf/policies/uniform_control_policy.py index 700bb3b11..3398aaeb2 100644 --- a/sandbox/rocky/tf/policies/uniform_control_policy.py +++ b/rllab/tf/policies/uniform_control_policy.py @@ -1,5 +1,5 @@ -from sandbox.rocky.tf.policies import Policy from rllab.core import Serializable +from rllab.tf.policies import Policy class UniformControlPolicy(Policy, Serializable): diff --git a/rllab/tf/q_functions/__init__.py b/rllab/tf/q_functions/__init__.py new file mode 100644 index 000000000..12f73d7d1 --- /dev/null +++ b/rllab/tf/q_functions/__init__.py @@ -0,0 +1 @@ +from rllab.tf.q_functions.base import QFunction diff --git a/rllab/tf/q_functions/base.py b/rllab/tf/q_functions/base.py new file mode 100644 index 000000000..3c255235c --- /dev/null +++ b/rllab/tf/q_functions/base.py @@ -0,0 +1,4 @@ +from rllab.tf.core import Parameterized + +class QFunction(Parameterized): + pass diff --git a/sandbox/rocky/tf/q_functions/continuous_mlp_q_function.py b/rllab/tf/q_functions/continuous_mlp_q_function.py similarity index 84% rename from sandbox/rocky/tf/q_functions/continuous_mlp_q_function.py rename to rllab/tf/q_functions/continuous_mlp_q_function.py index 731af79ed..8739b59e0 100644 --- a/sandbox/rocky/tf/q_functions/continuous_mlp_q_function.py +++ b/rllab/tf/q_functions/continuous_mlp_q_function.py @@ -1,17 +1,16 @@ -from sandbox.rocky.tf.q_functions import QFunction from rllab.core import Serializable from rllab.misc import ext - -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.core.layers import batch_norm -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.policies import StochasticPolicy -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.misc.tensor_utils import enclosing_scope +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +from rllab.tf.core.layers import batch_norm +from rllab.tf.distributions import Categorical +from rllab.tf.policies import StochasticPolicy +from rllab.tf.q_functions import QFunction +from rllab.tf.misc import tensor_utils +from rllab.tf.misc.tensor_utils import enclosing_scope import tensorflow as tf -import sandbox.rocky.tf.core.layers as L +import rllab.tf.core.layers as L class ContinuousMLPQFunction(QFunction, LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/regressors/__init__.py b/rllab/tf/regressors/__init__.py similarity index 100% rename from sandbox/rocky/tf/regressors/__init__.py rename to rllab/tf/regressors/__init__.py diff --git a/sandbox/rocky/tf/regressors/bernoulli_mlp_regressor.py b/rllab/tf/regressors/bernoulli_mlp_regressor.py similarity index 94% rename from sandbox/rocky/tf/regressors/bernoulli_mlp_regressor.py rename to rllab/tf/regressors/bernoulli_mlp_regressor.py index 7cd6bf64f..634f5c9f3 100644 --- a/sandbox/rocky/tf/regressors/bernoulli_mlp_regressor.py +++ b/rllab/tf/regressors/bernoulli_mlp_regressor.py @@ -1,18 +1,15 @@ - - - -import sandbox.rocky.tf.core.layers as L import numpy as np import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP from rllab.core import Serializable -from sandbox.rocky.tf.distributions import Bernoulli -from sandbox.rocky.tf.misc import tensor_utils +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +import rllab.tf.core.layers as L +from rllab.tf.distributions import Bernoulli +from rllab.tf.misc import tensor_utils from rllab.misc import logger -from sandbox.rocky.tf.optimizers import ConjugateGradientOptimizer -from sandbox.rocky.tf.optimizers import LbfgsOptimizer +from rllab.tf.optimizers import ConjugateGradientOptimizer +from rllab.tf.optimizers import LbfgsOptimizer class BernoulliMLPRegressor(LayersPowered, Serializable): diff --git a/sandbox/rocky/tf/regressors/categorical_mlp_regressor.py b/rllab/tf/regressors/categorical_mlp_regressor.py similarity index 94% rename from sandbox/rocky/tf/regressors/categorical_mlp_regressor.py rename to rllab/tf/regressors/categorical_mlp_regressor.py index 43a522089..ad6ef93f4 100644 --- a/sandbox/rocky/tf/regressors/categorical_mlp_regressor.py +++ b/rllab/tf/regressors/categorical_mlp_regressor.py @@ -1,20 +1,17 @@ - - - import numpy as np - import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.optimizers import PenaltyLbfgsOptimizer -from sandbox.rocky.tf.optimizers import LbfgsOptimizer -from sandbox.rocky.tf.optimizers import ConjugateGradientOptimizer -import sandbox.rocky.tf.core.layers as L + from rllab.core import Serializable from rllab.misc import ext from rllab.misc import logger +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +import rllab.tf.core.layers as L +from rllab.tf.misc import tensor_utils +from rllab.tf.distributions import Categorical +from rllab.tf.optimizers import PenaltyLbfgsOptimizer +from rllab.tf.optimizers import LbfgsOptimizer +from rllab.tf.optimizers import ConjugateGradientOptimizer NONE = list() diff --git a/sandbox/rocky/tf/regressors/deterministic_mlp_regressor.py b/rllab/tf/regressors/deterministic_mlp_regressor.py similarity index 92% rename from sandbox/rocky/tf/regressors/deterministic_mlp_regressor.py rename to rllab/tf/regressors/deterministic_mlp_regressor.py index 10aed5e77..7ce80384d 100644 --- a/sandbox/rocky/tf/regressors/deterministic_mlp_regressor.py +++ b/rllab/tf/regressors/deterministic_mlp_regressor.py @@ -1,22 +1,16 @@ - - - - - - import numpy as np - import tensorflow as tf -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.distributions import Categorical -from sandbox.rocky.tf.optimizers import PenaltyLbfgsOptimizer -from sandbox.rocky.tf.optimizers import LbfgsOptimizer -import sandbox.rocky.tf.core.layers as L + from rllab.core import Serializable from rllab.misc import ext from rllab.misc import logger +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +import rllab.tf.core.layers as L +from rllab.tf.distributions import Categorical +from rllab.tf.misc import tensor_utils +from rllab.tf.optimizers import PenaltyLbfgsOptimizer +from rllab.tf.optimizers import LbfgsOptimizer NONE = list() diff --git a/sandbox/rocky/tf/regressors/gaussian_mlp_regressor.py b/rllab/tf/regressors/gaussian_mlp_regressor.py similarity index 97% rename from sandbox/rocky/tf/regressors/gaussian_mlp_regressor.py rename to rllab/tf/regressors/gaussian_mlp_regressor.py index 2089d75f7..88fcc56fa 100644 --- a/sandbox/rocky/tf/regressors/gaussian_mlp_regressor.py +++ b/rllab/tf/regressors/gaussian_mlp_regressor.py @@ -1,15 +1,16 @@ import numpy as np +import tensorflow as tf -import sandbox.rocky.tf.core.layers as L -from sandbox.rocky.tf.core import LayersPowered -from sandbox.rocky.tf.core import MLP -from sandbox.rocky.tf.misc import tensor_utils -from sandbox.rocky.tf.optimizers import LbfgsOptimizer -from sandbox.rocky.tf.optimizers import PenaltyLbfgsOptimizer -from sandbox.rocky.tf.distributions import DiagonalGaussian from rllab.core import Serializable from rllab.misc import logger -import tensorflow as tf +from rllab.tf.core import LayersPowered +from rllab.tf.core import MLP +import rllab.tf.core.layers as L +from rllab.tf.distributions import DiagonalGaussian +from rllab.tf.misc import tensor_utils +from rllab.tf.optimizers import LbfgsOptimizer +from rllab.tf.optimizers import PenaltyLbfgsOptimizer + class GaussianMLPRegressor(LayersPowered, Serializable): diff --git a/rllab/tf/samplers/__init__.py b/rllab/tf/samplers/__init__.py new file mode 100644 index 000000000..3f487fba3 --- /dev/null +++ b/rllab/tf/samplers/__init__.py @@ -0,0 +1,2 @@ +from rllab.tf.samplers.batch_sampler import BatchSampler +from rllab.tf.samplers.vectorized_sampler import VectorizedSampler diff --git a/sandbox/rocky/tf/samplers/batch_sampler.py b/rllab/tf/samplers/batch_sampler.py similarity index 100% rename from sandbox/rocky/tf/samplers/batch_sampler.py rename to rllab/tf/samplers/batch_sampler.py diff --git a/sandbox/rocky/tf/samplers/vectorized_sampler.py b/rllab/tf/samplers/vectorized_sampler.py similarity index 97% rename from sandbox/rocky/tf/samplers/vectorized_sampler.py rename to rllab/tf/samplers/vectorized_sampler.py index 7b0591fa0..7e290e95f 100644 --- a/sandbox/rocky/tf/samplers/vectorized_sampler.py +++ b/rllab/tf/samplers/vectorized_sampler.py @@ -1,14 +1,15 @@ +import itertools import pickle +import numpy as np import tensorflow as tf -from rllab.sampler.base import BaseSampler -from sandbox.rocky.tf.envs import ParallelVecEnvExecutor -from sandbox.rocky.tf.envs import VecEnvExecutor + from rllab.misc import tensor_utils -import numpy as np -from rllab.sampler import ProgBarCounter import rllab.misc.logger as logger -import itertools +from rllab.sampler import ProgBarCounter +from rllab.sampler.base import BaseSampler +from rllab.tf.envs import ParallelVecEnvExecutor +from rllab.tf.envs import VecEnvExecutor class VectorizedSampler(BaseSampler): diff --git a/sandbox/rocky/tf/spaces/__init__.py b/rllab/tf/spaces/__init__.py similarity index 100% rename from sandbox/rocky/tf/spaces/__init__.py rename to rllab/tf/spaces/__init__.py diff --git a/sandbox/rocky/tf/spaces/box.py b/rllab/tf/spaces/box.py similarity index 100% rename from sandbox/rocky/tf/spaces/box.py rename to rllab/tf/spaces/box.py diff --git a/sandbox/rocky/tf/spaces/discrete.py b/rllab/tf/spaces/discrete.py similarity index 100% rename from sandbox/rocky/tf/spaces/discrete.py rename to rllab/tf/spaces/discrete.py diff --git a/sandbox/rocky/tf/spaces/product.py b/rllab/tf/spaces/product.py similarity index 100% rename from sandbox/rocky/tf/spaces/product.py rename to rllab/tf/spaces/product.py diff --git a/sandbox/rocky/__init__.py b/sandbox/rocky/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sandbox/rocky/tf/__init__.py b/sandbox/rocky/tf/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/sandbox/rocky/tf/algos/__init__.py b/sandbox/rocky/tf/algos/__init__.py deleted file mode 100644 index b7e18bb03..000000000 --- a/sandbox/rocky/tf/algos/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from sandbox.rocky.tf.algos.batch_polopt import BatchPolopt -from sandbox.rocky.tf.algos.npo import NPO -from sandbox.rocky.tf.algos.trpo import TRPO -from sandbox.rocky.tf.algos.vpg import VPG diff --git a/sandbox/rocky/tf/core/__init__.py b/sandbox/rocky/tf/core/__init__.py deleted file mode 100644 index dd872d630..000000000 --- a/sandbox/rocky/tf/core/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from sandbox.rocky.tf.core.parameterized import JointParameterized -from sandbox.rocky.tf.core.parameterized import Parameterized -from sandbox.rocky.tf.core.layers_powered import LayersPowered -from sandbox.rocky.tf.core import layers -from sandbox.rocky.tf.core.network import MLP -from sandbox.rocky.tf.core.network import GRUNetwork -from sandbox.rocky.tf.core.network import ConvNetwork -from sandbox.rocky.tf.core.network import LSTMNetwork diff --git a/sandbox/rocky/tf/distributions/__init__.py b/sandbox/rocky/tf/distributions/__init__.py deleted file mode 100644 index 58ce929a5..000000000 --- a/sandbox/rocky/tf/distributions/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from sandbox.rocky.tf.distributions.bernoulli import Bernoulli -from sandbox.rocky.tf.distributions.categorical import Categorical -from sandbox.rocky.tf.distributions.base import Distribution -from sandbox.rocky.tf.distributions.diagonal_gaussian import DiagonalGaussian -from sandbox.rocky.tf.distributions.recurrent_categorical import RecurrentCategorical -from sandbox.rocky.tf.distributions.recurrent_diagonal_gaussian import RecurrentDiagonalGaussian diff --git a/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py b/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py deleted file mode 100644 index 62fadfca8..000000000 --- a/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py +++ /dev/null @@ -1,6 +0,0 @@ - - - -from sandbox.rocky.tf.distributions import DiagonalGaussian - -RecurrentDiagonalGaussian = DiagonalGaussian diff --git a/sandbox/rocky/tf/envs/__init__.py b/sandbox/rocky/tf/envs/__init__.py deleted file mode 100644 index 559ce261d..000000000 --- a/sandbox/rocky/tf/envs/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from sandbox.rocky.tf.envs.base import TfEnv -from sandbox.rocky.tf.envs.base import to_tf_space -from sandbox.rocky.tf.envs.parallel_vec_env_executor import ParallelVecEnvExecutor -from sandbox.rocky.tf.envs.vec_env_executor import VecEnvExecutor diff --git a/sandbox/rocky/tf/optimizers/__init__.py b/sandbox/rocky/tf/optimizers/__init__.py deleted file mode 100644 index 42d780f25..000000000 --- a/sandbox/rocky/tf/optimizers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer -from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp -from sandbox.rocky.tf.optimizers.lbfgs_optimizer import LbfgsOptimizer -from sandbox.rocky.tf.optimizers.first_order_optimizer import FirstOrderOptimizer -from sandbox.rocky.tf.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer diff --git a/sandbox/rocky/tf/policies/__init__.py b/sandbox/rocky/tf/policies/__init__.py deleted file mode 100644 index 77c4b6819..000000000 --- a/sandbox/rocky/tf/policies/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from sandbox.rocky.tf.policies.base import Policy -from sandbox.rocky.tf.policies.base import StochasticPolicy -from sandbox.rocky.tf.policies.categorical_mlp_policy import CategoricalMLPPolicy -from sandbox.rocky.tf.policies.gaussian_gru_policy import GaussianGRUPolicy -from sandbox.rocky.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy -from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy diff --git a/sandbox/rocky/tf/q_functions/__init__.py b/sandbox/rocky/tf/q_functions/__init__.py deleted file mode 100644 index b963f34c0..000000000 --- a/sandbox/rocky/tf/q_functions/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from sandbox.rocky.tf.q_functions.base import QFunction diff --git a/sandbox/rocky/tf/q_functions/base.py b/sandbox/rocky/tf/q_functions/base.py deleted file mode 100644 index ec17d67c8..000000000 --- a/sandbox/rocky/tf/q_functions/base.py +++ /dev/null @@ -1,4 +0,0 @@ -from sandbox.rocky.tf.core import Parameterized - -class QFunction(Parameterized): - pass diff --git a/sandbox/rocky/tf/samplers/__init__.py b/sandbox/rocky/tf/samplers/__init__.py deleted file mode 100644 index 2c14d63e6..000000000 --- a/sandbox/rocky/tf/samplers/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from sandbox.rocky.tf.samplers.batch_sampler import BatchSampler -from sandbox.rocky.tf.samplers.vectorized_sampler import VectorizedSampler diff --git a/tests/test_serializable.py b/tests/test_serializable.py index 3ab2d6104..2b32e2e88 100644 --- a/tests/test_serializable.py +++ b/tests/test_serializable.py @@ -1,7 +1,7 @@ import tensorflow as tf from rllab.core import Serializable -from sandbox.rocky.tf.core.parameterized import Parameterized, suppress_params_loading +from rllab.tf.core.parameterized import Parameterized, suppress_params_loading class Simple(Parameterized, Serializable):