You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import time
import gymnasium as gym
import numpy as np
env = gym.make('FrozenLake-v1')
render = False
running_reward = None
## 初始化Q表格
Q = np.zeros([env.observation_space.n, env.action_space.n])
## 设置超参数
lr = .85
lambd = .99
num_episodes = 10000
rList = []
##=================开始游戏=====================##
for i in range(num_episodes):
episode_time = time.time()
# 重置环境,env.reset() 返回一个 (state, info) 的 tuple
s = env.reset()[0] # 只取状态,不需要 info
rAll = 0
for j in range(99):
if render:
env.render()
## 动作选择:找到最大Q值对应的动作,并加上噪声
a = np.argmax(Q[s, :] + np.random.randn(1, env.action_space.n) * (1. / (i + 1)))
## 与环境互动,获取新状态和奖励
s1, r, terminated, truncated, _ = env.step(a)
d = terminated or truncated # 合并 terminated 和 truncated
s1 = s1 # 新状态变量
## 更新Q表格
Q[s, a] = Q[s, a] + lr * (r + lambd * np.max(Q[s1, :]) - Q[s, a])
rAll += r
s = s1 # 更新状态
if d: # 如果已结束,则跳出循环
break
rList.append(rAll)
running_reward = rAll if running_reward is None else running_reward * 0.99 + rAll * 0.01
print("Episode [%d/%d] sum reward: %f running reward: %f took: %.5fs " % \
(i, num_episodes, rAll, running_reward, time.time() - episode_time))
# 打印最终Q表格
print("Final Q-Table Values:/n %s" % Q)
主要是import gymnasium as gym。gym 是由 OpenAI 开发的,而 gymnasium 是由 Farama Foundation 开发的,基本上继承了 gym 。从现在开始应该使用 gymnasium ,因为它正在实际开发中。使用gym会出现和新版numpy不兼容,报错module 'numpy' has no attribute 'bool8'. Did you mean: 'bool'?的情况
The text was updated successfully, but these errors were encountered:
gymnasium==0.29.1
numpy==2.1.1
主要是
import gymnasium as gym
。gym 是由 OpenAI 开发的,而 gymnasium 是由 Farama Foundation 开发的,基本上继承了 gym 。从现在开始应该使用 gymnasium ,因为它正在实际开发中。使用gym会出现和新版numpy不兼容,报错module 'numpy' has no attribute 'bool8'. Did you mean: 'bool'?
的情况The text was updated successfully, but these errors were encountered: