Skip to content

Commit

Permalink
Merge pull request #30 from GeoML-SIG/feature/better_env
Browse files Browse the repository at this point in the history
Updated Environment and Notebooks
  • Loading branch information
tasansal authored Aug 8, 2022
2 parents d47f712 + da642c6 commit e270154
Show file tree
Hide file tree
Showing 6 changed files with 444 additions and 1,490 deletions.
1 change: 1 addition & 0 deletions examples/01_rl_hello_world_cartpole.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@
"\n",
"More information about PPO:\n",
"* [PPO Paper](https://arxiv.org/abs/1707.06347)\n",
"* [Understanding PPO Plots](https://medium.com/aureliantactics/understanding-ppo-plots-in-tensorboard-cbc3199b9ba2)\n",
"* [OpenAI Spinning Up PPO](https://spinningup.openai.com/en/latest/algorithms/ppo.html)\n",
"* [PPO Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html)\n",
"* [PPO Explained - Article](https://jonathan-hui.medium.com/rl-proximal-policy-optimization-ppo-explained-77f014ec3f12)\n",
Expand Down
423 changes: 423 additions & 0 deletions examples/02_gitc_driller.ipynb

Large diffs are not rendered by default.

File renamed without changes.
1,432 changes: 0 additions & 1,432 deletions examples/GITCDRILLER.ipynb

This file was deleted.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "wildcatter"
version = "0.0.4"
version = "0.1.0"
description = "Wildcatter"
authors = ["Altay Sansal <[email protected]>"]
license = "Apache-2.0"
Expand Down
76 changes: 19 additions & 57 deletions src/wildcatter/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,81 +42,43 @@ def step( # noqa: C901
self, action: int
) -> tuple[NDArray[np.bool_], int, bool, dict[str, Any]]:
"""Take step based on action."""
done = False
actions = {
0: [1, 0], # down
1: [0, -1], # left
2: [0, 1], # right
3: [-1, 0], # up
}

def take_action(loc: list[int], action: int) -> tuple[list[int], bool]:
"""Convenience function for taking action."""
available_actions = list(actions.keys())
stuck = False

if loc[0] <= 1:
available_actions.remove(3)

if loc[0] == (self.nrow - 1):
available_actions.remove(0)

if loc[1] == 0:
available_actions.remove(1)

if loc[1] == (self.ncol - 1):
available_actions.remove(2)

if action not in available_actions:
action = random.choice(available_actions) # noqa: S311

available_actions.remove(action)
change = actions[action]
new_location = [old + new for old, new in zip(loc, change)]

if new_location in self.trajectory:
collision = True
while collision:
try:
new_action = random.choice(available_actions) # noqa: S311
except IndexError:
stuck = True
break

try:
available_actions.remove(new_action) # noqa: S311
except ValueError:
stuck = True
break
dz_dx = actions[action]
new_location = [prev + now for prev, now in zip(self.bit_location, dz_dx)]

change = actions[new_action]
new_location = [old + new for old, new in zip(loc, change)]
self.bit_location = new_location

if new_location not in self.trajectory:
collision = False
self.trajectory.append(new_location)
newrow, newcol = new_location

return new_location, stuck
self.pipe_used += 1

new_location, stuck = take_action(self.bit_location, action)
if newrow < 1 or newrow >= self.nrow:
done = True
reward = -100

done = False
if stuck:
elif newcol < 0 or newcol >= self.ncol:
done = True
reward = 0
reward = -100

else:
self.bit_location = new_location

self.trajectory.append(new_location)
newrow, newcol = new_location

reward = self.model[newrow, newcol]

reward = self.model[newrow, newcol] + self.pipe_used / 2
self.update_state()

self.pipe_used += 1
if self.pipe_used == self.available_pipe:
done = True
reward = 0

if self.pipe_used == self.available_pipe:
done = True
if self.bit_location in self.trajectory[:-1]:
done = True
reward = -100

info: dict[str, Any] = {}

Expand Down

0 comments on commit e270154

Please sign in to comment.