Skip to content

Commit

Permalink
Use the epsilon-greedy algo to decide sampling explore or exploit
Browse files Browse the repository at this point in the history
  • Loading branch information
xpmatteo committed Oct 27, 2023
1 parent 5b4563e commit 7376d5f
Showing 5 changed files with 80 additions and 1,817 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -50,8 +50,8 @@ image to appear. The parameters are:

# TODO AI

- macro-move sampling: tune the explore/exploit ratio
- macro-move sampling: avoid duplicate macro-moves in a node's children
- macro-move sampling: tune the explore/exploit ratio
- open loop instead of chance nodes
- implement evasion and line-of-sight rules for ranged combat, to allow the realistic game strategy to emerge
- make end phase automatic
21 changes: 11 additions & 10 deletions src/ai/mcts_player.js
Original file line number Diff line number Diff line change
@@ -165,10 +165,17 @@ export class DecisionNode extends TreeNode {
/*
This decides which is the most likely command to explore during tree search
*/
bestUctChild(expansionFactor = DEFAULT_EXPANSION_FACTOR) {
bestUctChild(expansionFactor = DEFAULT_EXPANSION_FACTOR, samplingExplorationChance = 0.01) {
let best = undefined;
let bestScore = -Infinity;
randomShuffleArray(this.children);
if (this.children[0].command instanceof MacroCommand && Math.random() < samplingExplorationChance) {
const macroCommand = perturbSample(this.game.validCommands(), this.children[0].command);
const clone = executeCommand(this.game, macroCommand);
const childNode = new DecisionNode(clone, this, 0, 0, [], macroCommand);
this.children.push(childNode);
return childNode;
}
const logOfThisVisits = Math.log(this.visits);
for (let child of this.children) {
const factor = (child.game.currentSide === this.game.currentSide) ? 1 : -1;
@@ -179,13 +186,6 @@ export class DecisionNode extends TreeNode {
bestScore = currentScore;
}
}
if (best.command instanceof MacroCommand && bestScore < expansionFactor * Math.sqrt(logOfThisVisits / 10)) {
const macroCommand = perturbSample(this.game.validCommands(), best.command);
const clone = executeCommand(this.game, macroCommand);
const childNode = new DecisionNode(clone, this, 0, 0, [], macroCommand);
this.children.push(childNode);
return childNode;
}
return best;
}

@@ -316,6 +316,7 @@ export class MctsPlayer {
this.args.iterations = this.args.iterations || 1000;
this.args.playoutIterations = this.args.playoutIterations || 20;
this.args.logfunction = this.args.logfunction || console.log;
this.args.samplingExplorationChance = this.args.samplingExplorationChance || 0.01;
this.args.note = this.args.note || "";
}

@@ -392,15 +393,15 @@ export class MctsPlayer {
if (node.children.length === 0) {
return node.expand();
} else {
node = node.bestUctChild(this.args.expansionFactor);
node = node.bestUctChild(this.args.expansionFactor, this.args.samplingExplorationChance);
}
}
// node is terminal
return [node];
}

toString() {
return `MctsPlayer(${this.args.iterations}, ${this.args.playoutIterations}, ${this.args.note}))`;
return `MctsPlayer(${this.args.iterations}, ${this.args.playoutIterations}, ${this.args.samplingExplorationChance}, ${this.args.note}))`;
}
}

3 changes: 2 additions & 1 deletion src/config.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@

export const MCTS_EXPANSION_FACTOR = 1.4142;
export const MCTS_ITERATIONS = 20000;
export const MCTS_ITERATIONS = 40000;
export const MCTS_PLAYOUT_ITERATIONS = 40;
export const MCTS_SAMPLING_EXPLORATION_CHANCE = 0.01;

import { Point } from "./lib/hexlib.js";
export const CARD_IMAGE_SIZE = new Point(400, 560);
3 changes: 2 additions & 1 deletion src/main.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CARD_IMAGE_SIZE, MCTS_PLAYOUT_ITERATIONS } from "./config.js";
import { CARD_IMAGE_SIZE, MCTS_PLAYOUT_ITERATIONS, MCTS_SAMPLING_EXPLORATION_CHANCE } from "./config.js";
import { MCTS_EXPANSION_FACTOR, MCTS_ITERATIONS } from "./config.js";
import { Autoplay, displayEvents } from "./ai/autoplay.js";
import { MctsPlayer } from "./ai/mcts_player.js";
@@ -27,6 +27,7 @@ const aiPlayer = new MctsPlayer({
iterations: MCTS_ITERATIONS,
expansionFactor: MCTS_EXPANSION_FACTOR,
playoutIterations: MCTS_PLAYOUT_ITERATIONS,
samplingExplorationChance: MCTS_SAMPLING_EXPLORATION_CHANCE,
});

let game;
Loading

0 comments on commit 7376d5f

Please sign in to comment.