Use the epsilon-greedy algo to decide sampling explore or exploit

xpmatteo · Oct 27, 2023 · 7376d5f · 7376d5f
1 parent 5b4563e
commit 7376d5f
Showing 5 changed files with 80 additions and 1,817 deletions.
diff --git a/README.md b/README.md
@@ -50,8 +50,8 @@ image to appear.  The parameters are:
 
 # TODO AI 
 
-- macro-move sampling: tune the explore/exploit ratio
 - macro-move sampling: avoid duplicate macro-moves in a node's children
+- macro-move sampling: tune the explore/exploit ratio
 - open loop instead of chance nodes
 - implement evasion and line-of-sight rules for ranged combat, to allow the realistic game strategy to emerge
 - make end phase automatic

diff --git a/src/ai/mcts_player.js b/src/ai/mcts_player.js
@@ -165,10 +165,17 @@ export class DecisionNode extends TreeNode {
     /*
         This decides which is the most likely command to explore during tree search
      */
-    bestUctChild(expansionFactor = DEFAULT_EXPANSION_FACTOR) {
+    bestUctChild(expansionFactor = DEFAULT_EXPANSION_FACTOR, samplingExplorationChance = 0.01) {
         let best = undefined;
         let bestScore = -Infinity;
         randomShuffleArray(this.children);
+        if (this.children[0].command instanceof MacroCommand && Math.random() < samplingExplorationChance) {
+            const macroCommand = perturbSample(this.game.validCommands(), this.children[0].command);
+            const clone = executeCommand(this.game, macroCommand);
+            const childNode = new DecisionNode(clone, this, 0, 0, [], macroCommand);
+            this.children.push(childNode);
+            return childNode;
+        }
         const logOfThisVisits = Math.log(this.visits);
         for (let child of this.children) {
             const factor = (child.game.currentSide === this.game.currentSide) ? 1 : -1;
@@ -179,13 +186,6 @@ export class DecisionNode extends TreeNode {
                 bestScore = currentScore;
             }
         }
-        if (best.command instanceof MacroCommand && bestScore < expansionFactor * Math.sqrt(logOfThisVisits / 10)) {
-            const macroCommand = perturbSample(this.game.validCommands(), best.command);
-            const clone = executeCommand(this.game, macroCommand);
-            const childNode = new DecisionNode(clone, this, 0, 0, [], macroCommand);
-            this.children.push(childNode);
-            return childNode;
-        }
         return best;
     }
 
@@ -316,6 +316,7 @@ export class MctsPlayer {
         this.args.iterations = this.args.iterations || 1000;
         this.args.playoutIterations = this.args.playoutIterations || 20;
         this.args.logfunction = this.args.logfunction || console.log;
+        this.args.samplingExplorationChance = this.args.samplingExplorationChance || 0.01;
         this.args.note = this.args.note || "";
     }
 
@@ -392,15 +393,15 @@ export class MctsPlayer {
             if (node.children.length === 0) {
                 return node.expand();
             } else {
-                node = node.bestUctChild(this.args.expansionFactor);
+                node = node.bestUctChild(this.args.expansionFactor, this.args.samplingExplorationChance);
             }
         }
         // node is terminal
         return [node];
     }
 
     toString() {
-        return `MctsPlayer(${this.args.iterations}, ${this.args.playoutIterations}, ${this.args.note}))`;
+        return `MctsPlayer(${this.args.iterations}, ${this.args.playoutIterations}, ${this.args.samplingExplorationChance}, ${this.args.note}))`;
     }
 }
 

diff --git a/src/config.js b/src/config.js
@@ -1,7 +1,8 @@
 
 export const MCTS_EXPANSION_FACTOR = 1.4142;
-export const MCTS_ITERATIONS = 20000;
+export const MCTS_ITERATIONS = 40000;
 export const MCTS_PLAYOUT_ITERATIONS = 40;
+export const MCTS_SAMPLING_EXPLORATION_CHANCE = 0.01;
 
 import { Point } from "./lib/hexlib.js";
 export const CARD_IMAGE_SIZE = new Point(400, 560);

diff --git a/src/main.js b/src/main.js
@@ -1,4 +1,4 @@
-import { CARD_IMAGE_SIZE, MCTS_PLAYOUT_ITERATIONS } from "./config.js";
+import { CARD_IMAGE_SIZE, MCTS_PLAYOUT_ITERATIONS, MCTS_SAMPLING_EXPLORATION_CHANCE } from "./config.js";
 import { MCTS_EXPANSION_FACTOR, MCTS_ITERATIONS } from "./config.js";
 import { Autoplay, displayEvents } from "./ai/autoplay.js";
 import { MctsPlayer } from "./ai/mcts_player.js";
@@ -27,6 +27,7 @@ const aiPlayer = new MctsPlayer({
     iterations: MCTS_ITERATIONS,
     expansionFactor: MCTS_EXPANSION_FACTOR,
     playoutIterations: MCTS_PLAYOUT_ITERATIONS,
+    samplingExplorationChance: MCTS_SAMPLING_EXPLORATION_CHANCE,
 });
 
 let game;