new feature

aeblyve · Dec 15, 2021 · 2e438cb · 2e438cb
1 parent 31648b3
commit 2e438cb
Show file tree

Hide file tree

Showing 3 changed files with 127 additions and 41 deletions.
diff --git a/code/bot/__pycache__/simpletetris.cpython-310.pyc b/code/bot/__pycache__/simpletetris.cpython-310.pyc
diff --git a/code/bot/qlearning.py b/code/bot/qlearning.py
@@ -1,6 +1,7 @@
 import random
 
-#Possible Future Features: Holes, Height
+# Possible Future Features: Holes, Height
+
 
 def grid_count_bool(grid, test):
     count = 0
@@ -10,12 +11,15 @@ def grid_count_bool(grid, test):
                 count += 1
     return count
 
+
 def c_count(state):
     return grid_count_bool(state.grid, "c")
 
+
 def b_count(state):
     return grid_count_bool(state.grid, "b")
 
+
 def w_something(state):
     total = 0
     r = 1.05
@@ -26,26 +30,35 @@ def w_something(state):
             if col != "_":
                 count += 1
         total += count * (r ** rind)
-    return total/70
+    return total / 70
+
 
 def stable(state):
     return 1
 
+
 def max_height(state):
     for rind in range(len(state.grid)):
         for col in state.grid[rind]:
-            if col != '_':
+            if col != "_":
                 return len(state.grid) - rind - 1
-#for some reason "unstable" might be a bug in QLearningAgent
+
+
+# for some reason "unstable" might be a bug in QLearningAgent
 def weak_hole_count(state):
     count = 0
     grid = state.grid
     for row in range(1, len(grid) - 1):
         for col in range(len(grid[row])):
-            if grid[row][col] == "_" and grid[row + 1][col] != "_" and grid[row - 1][col] != "_":
+            if (
+                grid[row][col] == "_"
+                and grid[row + 1][col] != "_"
+                and grid[row - 1][col] != "_"
+            ):
                 count += 1
     return count
 
+
 def weaker_hole_count(state):
     count = 0
     grid = state.grid
@@ -55,7 +68,8 @@ def weaker_hole_count(state):
                 count += 1
     return count
 
-#takado8 on github use this
+
+# takado8 on github use this
 def bumpiness(state):
     firstblock = []
     grid = state.grid
@@ -68,19 +82,30 @@ def bumpiness(state):
         firstblock += [count]
 
     bump = 0
-    for i in range(len(firstblock) -1):
+    for i in range(len(firstblock) - 1):
         bump += abs(firstblock[i + 1] - firstblock[i])
     return bump
 
+
 def c_cleared_reward(before, after):
     return c_count(before) - c_count(after)
 
+
 def any_cleared_reward(before, after):
     return 3 + grid_count_bool(after.grid, "_") - grid_count_bool(before.grid, "_")
 
+
+def c_open(state):
+    return state.top_row().count("c")
+
+
 features = []
+
+
 class Player:
-    def __init__(self, evaluator, numsteps=100, numtrails=1, epsilon=.05):
+    pass
+
+    def __init__(self, evaluator, numsteps=100, numtrails=1, epsilon=0.05):
         self.evaluator = evaluator
         self.numsteps = numsteps
         self.numtrails = numtrails
@@ -101,7 +126,7 @@ def play_game(self, game):
                     break
                 self.evaluator.update(state, succesor)
                 totalreward += succesor[2]
-                #print(state)
+                # print(state)
                 state = succesor[0]
             print("Trial: " + str(i) + "/" + str(self.numtrails))
             print("Game Done, End Board")
@@ -125,10 +150,8 @@ def pick_succesor(self, game, state):
         return successors[maxind]
 
 
-
-
 class QFeatureAgent:
-    def __init__(self, features, alpha= .00001, gamma = .96):
+    def __init__(self, features, alpha=0.00001, gamma=0.96):
         self.features = features
         self.weights = dict()
         self.alpha = alpha
@@ -148,36 +171,43 @@ def value(self, state):
             total += self.weights[feature] * feature(state)
         return total
 
-    #succesor tuple of (next state, action, reward)
+    # succesor tuple of (next state, action, reward)
     def update(self, original, successor):
-        difference = successor[2] + self.gamma * self.value(successor[0]) - self.value(original)
+        difference = (
+            successor[2] + self.gamma * self.value(successor[0]) - self.value(original)
+        )
         for feature in self.features:
-            self.weights[feature] = self.weights[feature] + self.alpha * difference * feature(original)
+            self.weights[feature] = self.weights[
+                feature
+            ] + self.alpha * difference * feature(original)
         difference = difference + 0
 
 
-#PROBLEM: Instability with max_hegiht and weak_hole_count,
+# PROBLEM: Instability with max_hegiht and weak_hole_count,
 # is it a bug or a consequence of how problem is set up, will investigate later
 from simpletetris import CheeseGameLocked, Piece
-tqfeature = QFeatureAgent([c_count, b_count, max_height, weaker_hole_count, bumpiness, w_something, stable])
+
+tqfeature = QFeatureAgent(
+    [c_count, b_count, max_height, weaker_hole_count, bumpiness, w_something, stable]
+)
 "Below I've listed where the values fall, as close as this is going to get, not good enough, need more"
 "Note that below numbers are on top are for 100 (basically until it can't)"
 " moves which due to suboptimality changes numbers, perhaps lower trials"
 "more accurate"
-#tqfeature.set_weight(c_count, 1.6)
-#tqfeature.set_weight(b_count, 0.5)
-#tqfeature.set_weight(max_height, -.25)
-#tqfeature.set_weight(weaker_hole_count, -1.7)
-#tqfeature.set_weight(bumpiness, -.15)
-#tqfeature.set_weight(w_something, 1)
-#tqfeature.set_weight(stable, 0)
-
-tqfeature.set_weight(c_count, .5)
+# tqfeature.set_weight(c_count, 1.6)
+# tqfeature.set_weight(b_count, 0.5)
+# tqfeature.set_weight(max_height, -.25)
+# tqfeature.set_weight(weaker_hole_count, -1.7)
+# tqfeature.set_weight(bumpiness, -.15)
+# tqfeature.set_weight(w_something, 1)
+# tqfeature.set_weight(stable, 0)
+
+tqfeature.set_weight(c_count, 0.5)
 tqfeature.set_weight(b_count, -0.035)
-tqfeature.set_weight(max_height, -.35)
+tqfeature.set_weight(max_height, -0.35)
 tqfeature.set_weight(weaker_hole_count, -1.85)
-tqfeature.set_weight(bumpiness, -.1)
-tqfeature.set_weight(w_something, .94)
+tqfeature.set_weight(bumpiness, -0.1)
+tqfeature.set_weight(w_something, 0.94)
 tqfeature.set_weight(stable, 0)
 tplayer = Player(tqfeature, 10000, 1000, 0.005)
 game = CheeseGameLocked(10, 20, 1, 9)

diff --git a/code/bot/simpletetris.py b/code/bot/simpletetris.py
@@ -1,6 +1,8 @@
 import random
 
 "Abstraction for Piece, Stores necessary data and some basic functions"
+
+
 class Piece:
     def __init__(self, center, positions, label):
         self.center = center
@@ -24,7 +26,11 @@ def change_center(self, shiftamount):
         for x, y in self.positions:
             new_piece.append((x + change_x, y + change_y))
         new_piece = tuple(new_piece)
-        return Piece((self.center[0] + change_x, self.center[1] + change_y), new_piece, self.label)
+        return Piece(
+            (self.center[0] + change_x, self.center[1] + change_y),
+            new_piece,
+            self.label,
+        )
 
     def bound_ranges(self):
         max_x = None
@@ -42,7 +48,10 @@ def bound_ranges(self):
                 min_y = y
         return (min_x, max_x), (min_y, max_y)
 
+
 "Class fot build Tetris Grids, supports clogging with random trash"
+
+
 class TetrisGridBuilder:
     def __init__(self):
         self.grid = [[]]
@@ -63,8 +72,14 @@ def set_blank_grid(self, width, height, blank):
         self.set_grid(grid)
         return self
 
-    def new_cheese_state(self,
-            x_dimension=10, y_dimension=20, hole_count=1, cheese_count=9, cheese_char="c"):
+    def new_cheese_state(
+        self,
+        x_dimension=10,
+        y_dimension=20,
+        hole_count=1,
+        cheese_count=9,
+        cheese_char="c",
+    ):
         self.set_blank_grid(x_dimension, y_dimension, "_")
 
         for g in range(y_dimension - cheese_count, y_dimension):
@@ -81,7 +96,10 @@ def new_cheese_state(self,
     def build(self):
         return TetrisGrid(tuple([tuple(row) for row in self.grid]), self.blank)
 
+
 "Class for Grid"
+
+
 class Grid:
     def __init__(self, grid, blank):
         self.grid = grid
@@ -102,6 +120,7 @@ def __repr__(self):
             out += " " + str(i) + " \n"
         return out
 
+
 def row_is_full(row, blank):
     for block in row:
         if block == blank:
@@ -119,19 +138,22 @@ def clear_rows(grid, blank):
         filtered.insert(0, blank_row)
     return tuple(filtered)
 
+
 "Represents the Tetris Grid"
+
+
 class TetrisGrid(Grid):
-   #to drop from sky, merely place piece in the clouds
+    # to drop from sky, merely place piece in the clouds
 
     def hard_drop(self, piece):
         y_delta = float("inf")
 
         for x, y in piece.positions:
-             for y_i in range(y, len(self.grid)):
+            for y_i in range(y, len(self.grid)):
                 if y_i >= 0 and self.grid[y_i][x] != self.blank:
                     y_delta = min(y_delta, y_i - y)
                     break
-             y_delta = min(y_delta, len(self.grid) - y)
+            y_delta = min(y_delta, len(self.grid) - y)
 
         new_piece = piece.change_center((0, y_delta - 1))
         return self.lock_piece(new_piece)
@@ -146,14 +168,29 @@ def lock_piece(self, piece):
         newgrid = clear_rows(newgrid, self.blank)
         return TetrisGrid(tuple([tuple(row) for row in newgrid]), self.blank)
 
-    #See if a given thing is contained in the grid
+    # See if a given thing is contained in the grid
     def contains(self, symbol):
         for row in self.grid:
             for col in row:
                 if col == symbol:
                     return True
         return False
 
+    def top_row(self):
+        """returns a tuple representing the flattened "top contour" """
+        row = []
+        for x in range(len(self.grid[0])):
+            empty = True
+            for y in range(len(self.grid)):
+                if self.grid[y][x] != self.blank:
+                    row.append(self.grid[y][x])
+                    empty = False
+                    break
+            if empty:
+                row.append(self.blank)
+        return tuple(row)
+
+
 T_PIECE = Piece((0, 0), ((0, -1), (-1, 0), (0, 0), (1, 0)), "b")
 L_PIECE = Piece((0, 0), ((0, 0), (1, 0), (0, -1), (0, -2)), "b")
 J_PIECE = Piece((0, 0), ((0, 0), (-1, 0), (0, -1), (0, -2)), "b")
@@ -164,10 +201,18 @@ def contains(self, symbol):
 
 PIECES = [T_PIECE, L_PIECE, J_PIECE, O_PIECE, S_PIECE, Z_PIECE, I_PIECE]
 
+
 class CheeseGameLocked:
     """Clear all the cheese to win."""
 
-    def __init__(self, x_dimension=10, y_dimension=20, hole_count=1, cheese_count=9, pieces=PIECES):
+    def __init__(
+        self,
+        x_dimension=10,
+        y_dimension=20,
+        hole_count=1,
+        cheese_count=9,
+        pieces=PIECES,
+    ):
         self.x_dimension = x_dimension
         self.y_dimension = y_dimension
         self.hole_count = hole_count
@@ -176,8 +221,17 @@ def __init__(self, x_dimension=10, y_dimension=20, hole_count=1, cheese_count=9,
         self.reward = lambda before, after: 0
 
     def get_start_state(self):
-        return TetrisGridBuilder().new_cheese_state(self.x_dimension, self.y_dimension,
-                                              self.hole_count, self.cheese_count, "c").build()
+        return (
+            TetrisGridBuilder()
+            .new_cheese_state(
+                self.x_dimension,
+                self.y_dimension,
+                self.hole_count,
+                self.cheese_count,
+                "c",
+            )
+            .build()
+        )
 
     def set_reward(self, reward):
         self.reward = reward
@@ -195,10 +249,12 @@ def get_successors(self, state, given=None):
             for offset in range(len(state.grid[0]) - xrange[1] + xrange[0]):
                 newstate = state.hard_drop(piece.change_center((offset, 0)))
                 if newstate not in explored:
-                    successors.append((newstate, (i, offset), self.reward(state, newstate)))
+                    successors.append(
+                        (newstate, (i, offset), self.reward(state, newstate))
+                    )
             piece = piece.rotate_left()
 
         return successors
 
     def is_goal(self, state):
-        return not state.contains("c")
+        return not state.contains("c")