Merge pull request #30 from GeoML-SIG/feature/better_env

Updated Environment and Notebooks
GeoML-SIG · Aug 8, 2022 · e270154 · e270154
2 parents d47f712 + da642c6
commit e270154
Show file tree

Hide file tree

Showing 6 changed files with 444 additions and 1,490 deletions.
diff --git a/examples/01_rl_hello_world_cartpole.ipynb b/examples/01_rl_hello_world_cartpole.ipynb
@@ -271,6 +271,7 @@
     "\n",
     "More information about PPO:\n",
     "* [PPO Paper](https://arxiv.org/abs/1707.06347)\n",
+    "* [Understanding PPO Plots](https://medium.com/aureliantactics/understanding-ppo-plots-in-tensorboard-cbc3199b9ba2)\n",
     "* [OpenAI Spinning Up PPO](https://spinningup.openai.com/en/latest/algorithms/ppo.html)\n",
     "* [PPO Stable Baselines3](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html)\n",
     "* [PPO Explained - Article](https://jonathan-hui.medium.com/rl-proximal-policy-optimization-ppo-explained-77f014ec3f12)\n",

diff --git a/examples/02_gitc_driller.ipynb b/examples/02_gitc_driller.ipynb
diff --git a/examples/model_gen.ipynb → examples/03_procedural_model_gen.ipynb b/examples/model_gen.ipynb → examples/03_procedural_model_gen.ipynb
diff --git a/examples/GITCDRILLER.ipynb b/examples/GITCDRILLER.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "wildcatter"
-version = "0.0.4"
+version = "0.1.0"
 description = "Wildcatter"
 authors = ["Altay Sansal <[email protected]>"]
 license = "Apache-2.0"

diff --git a/src/wildcatter/environment.py b/src/wildcatter/environment.py
@@ -42,81 +42,43 @@ def step(  # noqa: C901
         self, action: int
     ) -> tuple[NDArray[np.bool_], int, bool, dict[str, Any]]:
         """Take step based on action."""
+        done = False
         actions = {
             0: [1, 0],  # down
             1: [0, -1],  # left
             2: [0, 1],  # right
             3: [-1, 0],  # up
         }
 
-        def take_action(loc: list[int], action: int) -> tuple[list[int], bool]:
-            """Convenience function for taking action."""
-            available_actions = list(actions.keys())
-            stuck = False
-
-            if loc[0] <= 1:
-                available_actions.remove(3)
-
-            if loc[0] == (self.nrow - 1):
-                available_actions.remove(0)
-
-            if loc[1] == 0:
-                available_actions.remove(1)
-
-            if loc[1] == (self.ncol - 1):
-                available_actions.remove(2)
-
-            if action not in available_actions:
-                action = random.choice(available_actions)  # noqa: S311
-
-            available_actions.remove(action)
-            change = actions[action]
-            new_location = [old + new for old, new in zip(loc, change)]
-
-            if new_location in self.trajectory:
-                collision = True
-                while collision:
-                    try:
-                        new_action = random.choice(available_actions)  # noqa: S311
-                    except IndexError:
-                        stuck = True
-                        break
-
-                    try:
-                        available_actions.remove(new_action)  # noqa: S311
-                    except ValueError:
-                        stuck = True
-                        break
+        dz_dx = actions[action]
+        new_location = [prev + now for prev, now in zip(self.bit_location, dz_dx)]
 
-                    change = actions[new_action]
-                    new_location = [old + new for old, new in zip(loc, change)]
+        self.bit_location = new_location
 
-                    if new_location not in self.trajectory:
-                        collision = False
+        self.trajectory.append(new_location)
+        newrow, newcol = new_location
 
-            return new_location, stuck
+        self.pipe_used += 1
 
-        new_location, stuck = take_action(self.bit_location, action)
+        if newrow < 1 or newrow >= self.nrow:
+            done = True
+            reward = -100
 
-        done = False
-        if stuck:
+        elif newcol < 0 or newcol >= self.ncol:
             done = True
-            reward = 0
+            reward = -100
 
         else:
-            self.bit_location = new_location
-
-            self.trajectory.append(new_location)
-            newrow, newcol = new_location
-
-            reward = self.model[newrow, newcol]
-
+            reward = self.model[newrow, newcol] + self.pipe_used / 2
             self.update_state()
 
-            self.pipe_used += 1
+        if self.pipe_used == self.available_pipe:
+            done = True
+            reward = 0
 
-            if self.pipe_used == self.available_pipe:
-                done = True
+        if self.bit_location in self.trajectory[:-1]:
+            done = True
+            reward = -100
 
         info: dict[str, Any] = {}