diff --git a/setup.py b/setup.py index fa1d03f31..5a1846697 100644 --- a/setup.py +++ b/setup.py @@ -203,11 +203,11 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: "torch>=1.4.0", "tqdm", "scikit-learn>=0.21.2", - "seals>=0.1.5", + "seals~=0.1.5", STABLE_BASELINES3, "sacred>=0.8.4", "tensorboard>=1.14", - "huggingface_sb3>=2.2.1", + "huggingface_sb3~=2.3", "datasets>=2.8.0", ], tests_require=TESTS_REQUIRE, diff --git a/tests/algorithms/test_mce_irl.py b/tests/algorithms/test_mce_irl.py index 53b9527df..9fd527c41 100644 --- a/tests/algorithms/test_mce_irl.py +++ b/tests/algorithms/test_mce_irl.py @@ -132,6 +132,8 @@ def test_infinite_horizon_error(random_mdp, rng): def test_policy_om_random_mdp(discount: float): """Test that optimal policy occupancy measure ("om") for a random MDP is sane.""" mdp = gym.make("seals/Random-v0") + mdp.seed(0) + V, Q, pi = mce_partition_fh(mdp, discount=discount) assert np.all(np.isfinite(V)) assert np.all(np.isfinite(Q))