understanding-search · mivanit · Apr 28, 2023 · Mar 28, 2023 · Mar 28, 2023 · Mar 28, 2023
diff --git a/makefile b/makefile
@@ -51,6 +51,7 @@ convert_notebooks:
 
 .PHONY: test_notebooks
 test_notebooks: convert_notebooks
+	@echo "run tests on converted notebooks in $(CONVERTED_NOTEBOOKS_TEMP_DIR) using $(HELPERS_DIR)/run_notebook_tests.py"
 	python $(HELPERS_DIR)/run_notebook_tests.py --notebooks-dir=$(NOTEBOOKS_DIR) --converted-notebooks-temp-dir=$(CONVERTED_NOTEBOOKS_TEMP_DIR)
 
 

diff --git a/maze_transformer/evaluation/baseline_models.py b/maze_transformer/evaluation/baseline_models.py
@@ -60,7 +60,7 @@ def _predict_next_step(
         unvisited_neighbors = [coord for coord in neighbors if coord not in path]
 
         # if the current path is already as long as the solution, there can be no correct next step
-        correct_step = solution[len(path)] if len(solution) > len(path) else None
+        correct_step = tuple(solution[len(path)]) if len(solution) > len(path) else None
 
         if len(unvisited_neighbors) == 0:
             return SPECIAL_TOKENS["path_end"]
@@ -89,7 +89,7 @@ def _generate_path(
         maze = LatticeMaze.from_tokens(tokens)
         origin_coord = self.config.dataset_cfg.token_node_map[get_origin_token(tokens)]
         target_coord = self.config.dataset_cfg.token_node_map[get_target_token(tokens)]
-        solution = maze.find_shortest_path(origin_coord, target_coord)
+        solution = maze.find_shortest_path(origin_coord, target_coord).tolist()
 
         existing_path = tokens_to_coords(
             get_path_tokens(tokens), self.config.dataset_cfg

diff --git a/maze_transformer/evaluation/eval_model.py b/maze_transformer/evaluation/eval_model.py
@@ -10,7 +10,6 @@
 
 from maze_transformer.evaluation.path_evals import PathEvalFunction, PathEvals
 from maze_transformer.generation.constants import SPECIAL_TOKENS
-from maze_transformer.generation.lattice_maze import SolvedMaze
 from maze_transformer.training.config import ConfigHolder
 from maze_transformer.training.maze_dataset import MazeDataset, MazeDatasetConfig
 from maze_transformer.training.training import TRAIN_SAVE_FILES
@@ -150,15 +149,12 @@ def evaluate_model(
         name: StatCounter() for name in eval_functions.keys()
     }
 
-    for batch in chunks(dataset.mazes_tokens, batch_size):
-        # TODO: This won't be needed after #124, then we can call mazes_objs instead
-        # https://github.com/orgs/AISC-understanding-search/projects/1/views/1?pane=issue&itemId=23879308
-        solved_mazes: SolvedMaze = [
-            SolvedMaze.from_tokens(tokens, dataset.cfg) for tokens in batch
+    for maze_batch in chunks(dataset, batch_size):
+        tokens_batch = [
+            maze.as_tokens(dataset.cfg.node_token_map) for maze in maze_batch
         ]
-
         predictions = predict_maze_paths(
-            tokens_batch=batch,
+            tokens_batch=tokens_batch,
             data_cfg=dataset.cfg,
             model=model,
             max_new_tokens=max_new_tokens,
@@ -173,7 +169,7 @@ def evaluate_model(
                     prediction=np.array(prediction),
                     model=model,
                 )
-                for sm, prediction in zip(solved_mazes, predictions)
+                for sm, prediction in zip(maze_batch, predictions)
             )
 
     return score_counters
diff --git a/maze_transformer/evaluation/maze_complexity_evals.py b/maze_transformer/evaluation/maze_complexity_evals.py
@@ -0,0 +1,12 @@
+import typing
+
+from maze_transformer.generation.lattice_maze import SolvedMaze
+from maze_transformer.utils.utils import register_method
+
+MAZE_COMPLEXITY_EVALS: dict[str, typing.Callable[[SolvedMaze], float]] = dict()
+
+
+class MazeComplexityEvals:
+    @register_method(MAZE_COMPLEXITY_EVALS)
+    def solution_length(maze: SolvedMaze) -> float:
+        return len(maze.solution)
diff --git a/maze_transformer/evaluation/path_evals.py b/maze_transformer/evaluation/path_evals.py
@@ -1,27 +1,26 @@
-from typing import Iterable, Optional, Protocol, TypeAlias
+import typing
 
 import numpy as np
-from jaxtyping import Int
 
 from maze_transformer.generation.constants import Coord, CoordArray, CoordTup
 from maze_transformer.generation.lattice_maze import LatticeMaze
 from maze_transformer.utils.utils import register_method
 
 # pylint: disable=unused-argument
-MazePath: TypeAlias = Int[np.ndarray, "node x_y_pos"]
+MazePath = CoordArray
 
 
-class PathEvalFunction(Protocol):
+class PathEvalFunction(typing.Protocol):
     def __call__(
         self,
-        maze: Optional[LatticeMaze] = None,
-        solution: Optional[CoordArray] = None,
-        prediction: Optional[CoordArray] = None,
+        maze: LatticeMaze | None = None,
+        solution: CoordArray | None = None,
+        prediction: CoordArray | None = None,
     ) -> float:
         ...
 
 
-def path_as_segments_iter(path: CoordArray) -> Iterable[tuple]:
+def path_as_segments_iter(path: CoordArray) -> typing.Iterable[tuple]:
     """
     Iterate over the segments of a path (ie each consecutive pair).
     """

diff --git a/maze_transformer/generation/generators.py b/maze_transformer/generation/generators.py
@@ -1,13 +1,14 @@
 import random
+import warnings
 from typing import Any, Callable
 
 import numpy as np
 
 from maze_transformer.generation.constants import CoordArray
 from maze_transformer.generation.lattice_maze import (
     NEIGHBORS_MASK,
+    ConnectionList,
     Coord,
-    CoordTup,
     LatticeMaze,
     SolvedMaze,
 )
@@ -18,9 +19,11 @@ class LatticeMazeGenerators:
 
     @staticmethod
     def gen_dfs(
-        grid_shape: Coord | CoordTup,
-        start_coord: Coord | None = None,
+        grid_shape: Coord,
         lattice_dim: int = 2,
+        n_accessible_cells: int | None = None,
+        max_tree_depth: int | None = None,
+        start_coord: Coord | None = None,
     ) -> LatticeMaze:
         """generate a lattice maze using depth first search, iterative
 
@@ -35,28 +38,39 @@ def gen_dfs(
                         4. Mark the chosen cell as visited and push it to the stack
         """
 
-        grid_shape = np.array(grid_shape)
-
-        # initialize the maze with no connections
-        connection_list: np.ndarray = np.zeros(
-            (lattice_dim, grid_shape[0], grid_shape[1]), dtype=bool
-        )
-
+        # Default values if no constraints have been passed
+        grid_shape: Coord = np.array(grid_shape)
+        n_total_cells: int = np.prod(grid_shape)
+        if n_accessible_cells is None:
+            n_accessible_cells = n_total_cells
+        if max_tree_depth is None:
+            max_tree_depth = (
+                2 * n_total_cells
+            )  # We define max tree depth counting from the start coord in two directions. Therefore we divide by two in the if clause for neighboring sites later and multiply by two here.
         if start_coord is None:
-            start_coord: Coord = (
-                random.randint(0, grid_shape[0] - 1),
-                random.randint(0, grid_shape[1] - 1),
+            start_coord: Coord = np.random.randint(
+                0,
+                np.maximum(grid_shape - 1, 1),
+                size=2,
             )
+        else:
+            start_coord = np.array(start_coord)
 
-        # print(f"{grid_shape = } {start_coord = }")
+        # initialize the maze with no connections
+        connection_list: ConnectionList = np.zeros(
+            (lattice_dim, grid_shape[0], grid_shape[1]), dtype=np.bool_
+        )
 
         # initialize the stack with the target coord
         visited_cells: set[tuple[int, int]] = set()
         visited_cells.add(tuple(start_coord))
         stack: list[Coord] = [start_coord]
 
-        # loop until the stack is empty
-        while stack:
+        # initialize tree_depth_counter
+        current_tree_depth: int = 1
+
+        # loop until the stack is empty or n_connected_cells is reached
+        while stack and (len(visited_cells) < n_accessible_cells):
             # get the current coord from the stack
             current_coord: Coord = stack.pop()
 
@@ -73,7 +87,10 @@ def gen_dfs(
                 )
             ]
 
-            if unvisited_neighbors_deltas:
+            # don't continue if max_tree_depth/2 is already reached (divide by 2 because we can branch to multiple directions)
+            if unvisited_neighbors_deltas and (
+                current_tree_depth <= max_tree_depth / 2
+            ):
                 stack.append(current_coord)
 
                 # choose one of the unvisited neighbors
@@ -92,22 +109,24 @@ def gen_dfs(
                 visited_cells.add(tuple(chosen_neighbor))
                 stack.append(chosen_neighbor)
 
+                # Update current tree depth
+                current_tree_depth += 1
+            else:
+                current_tree_depth -= 1
+
         return LatticeMaze(
             connection_list=connection_list,
             generation_meta=dict(
                 func_name="gen_dfs",
                 grid_shape=grid_shape,
                 start_coord=start_coord,
+                visited_cells=visited_cells,
+                n_accessible_cells=n_accessible_cells,
+                max_tree_depth=max_tree_depth,
+                fully_connected=(len(visited_cells) == n_accessible_cells),
             ),
         )
 
-    @classmethod
-    def gen_dfs_with_solution(cls, grid_shape: Coord) -> SolvedMaze:
-        maze: LatticeMaze = cls.gen_dfs(grid_shape)
-        solution: CoordArray = np.array(maze.generate_random_path())
-
-        return SolvedMaze.from_lattice_maze(lattice_maze=maze, solution=solution)
-
     @staticmethod
     def gen_wilson(
         grid_shape: Coord,
@@ -137,9 +156,9 @@ def neighbor(current: Coord, direction: int) -> Coord:
 
         # A connection list only contains two elements: one boolean matrix indicating all the
         # downwards connections in the maze, and one boolean matrix indicating the rightwards connections.
-        connection_list: np.ndarray = np.zeros((2, rows, cols), dtype=bool)
+        connection_list: np.ndarray = np.zeros((2, rows, cols), dtype=np.bool_)
 
-        connected = np.zeros(grid_shape, dtype=bool)
+        connected = np.zeros(grid_shape, dtype=np.bool_)
         direction_matrix = np.zeros(grid_shape, dtype=int)
 
         # Mark a random cell as connected
@@ -198,12 +217,33 @@ def neighbor(current: Coord, direction: int) -> Coord:
             generation_meta=dict(
                 func_name="gen_wilson",
                 grid_shape=grid_shape,
+                fully_connected=True,
             ),
         )
 
+    @classmethod
+    def gen_dfs_with_solution(cls, grid_shape: Coord):
+        warnings.warn(
+            "gen_dfs_with_solution is deprecated, use get_maze_with_solution instead",
+            DeprecationWarning,
+        )
+        return get_maze_with_solution("gen_dfs", grid_shape)
+
 
 # TODO: use the thing @valedan wrote for the evals function to make this automatic?
 GENERATORS_MAP: dict[str, Callable[[Coord, Any], "LatticeMaze"]] = {
     "gen_dfs": LatticeMazeGenerators.gen_dfs,
     "gen_wilson": LatticeMazeGenerators.gen_wilson,
 }
+
+
+def get_maze_with_solution(
+    gen_name: str,
+    grid_shape: Coord,
+    maze_ctor_kwargs: dict | None = None,
+) -> SolvedMaze:
+    if maze_ctor_kwargs is None:
+        maze_ctor_kwargs = dict()
+    maze: LatticeMaze = GENERATORS_MAP[gen_name](grid_shape, **maze_ctor_kwargs)
+    solution: CoordArray = np.array(maze.generate_random_path())
+    return SolvedMaze.from_lattice_maze(lattice_maze=maze, solution=solution)