From 80c22efc6294af1d2adad564be9cd924b26f46a5 Mon Sep 17 00:00:00 2001
From: Matteo Vaccari <vaccari@pobox.com>
Date: Fri, 27 Oct 2023 08:59:56 +0200
Subject: [PATCH] Now adding samples when explore is chosen

---
 src/ai/macro_command_sampling.js       | 16 +++++++
 src/ai/mcts_player.js                  | 10 ++++-
 src/model/commands/macro_command.js    | 11 +++++
 test/ai/macro_command_sampling_test.js | 61 ++++++++++++++++++++++----
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/src/ai/macro_command_sampling.js b/src/ai/macro_command_sampling.js
index d227140..edb5b33 100644
--- a/src/ai/macro_command_sampling.js
+++ b/src/ai/macro_command_sampling.js
@@ -59,3 +59,19 @@ function groupByFromHex(availableCommands) {
     return groups;
 }
 
+/**
+ * @param {MoveCommand[]} availableCommands
+ * @param {MacroCommand} existingSample
+ * @returns {MacroCommand}
+ */
+export function perturbSample(availableCommands, existingSample) {
+    // find an available command that has a fromHex in the sample and a toHex not in the sample
+    const commandToReplace = availableCommands.find((command) =>
+        command.fromHex && existingSample.hasFromHex(command.fromHex) && !existingSample.hasToHex(command.toHex));
+
+    // replace it in the new sample
+    const newCommands = existingSample.commands.slice();
+    newCommands[existingSample.indexOfFromHex(commandToReplace.fromHex)] = commandToReplace;
+    return new MacroCommand(newCommands);
+}
+
diff --git a/src/ai/mcts_player.js b/src/ai/mcts_player.js
index cab3f72..60b52c4 100644
--- a/src/ai/mcts_player.js
+++ b/src/ai/mcts_player.js
@@ -1,4 +1,5 @@
-import { sample } from "./macro_command_sampling.js";
+import { MacroCommand } from "model/commands/macro_command.js";
+import { perturbSample, sample } from "./macro_command_sampling.js";
 import { MoveCommand } from "../model/commands/move_command.js";
 import { randomElement, randomShuffleArray } from "../lib/random.js";
 import { attackProximityScoreForHex, scoreGreedy, scoreMcts } from "./score.js";
@@ -178,6 +179,13 @@ export class DecisionNode extends TreeNode {
                 bestScore = currentScore;
             }
         }
+        if (best.command instanceof MacroCommand && bestScore < expansionFactor * Math.sqrt(logOfThisVisits / 1)) {
+            const macroCommand = perturbSample(this.game.validCommands(), best.command);
+            const clone = executeCommand(this.game, macroCommand);
+            const childNode = new DecisionNode(clone, this, 0, 0, [], macroCommand);
+            this.children.push(childNode);
+            return childNode;
+        }
         return best;
     }
 
diff --git a/src/model/commands/macro_command.js b/src/model/commands/macro_command.js
index e420f38..4d853c2 100644
--- a/src/model/commands/macro_command.js
+++ b/src/model/commands/macro_command.js
@@ -27,5 +27,16 @@ export class MacroCommand extends Command {
         return events;
     }
 
+    hasFromHex(hex) {
+        return this.commands.some((command) => command.fromHex === hex);
+    }
+
+    hasToHex(hex) {
+        return this.commands.some((command) => command.toHex === hex);
+    }
+
+    indexOfFromHex(hex) {
+        return this.commands.findIndex((command) => command.fromHex === hex);
+    }
 }
 
diff --git a/test/ai/macro_command_sampling_test.js b/test/ai/macro_command_sampling_test.js
index 38133e2..b50953b 100644
--- a/test/ai/macro_command_sampling_test.js
+++ b/test/ai/macro_command_sampling_test.js
@@ -1,18 +1,19 @@
-import { sample } from "ai/macro_command_sampling.js";
+import { perturbSample, sample } from "ai/macro_command_sampling.js";
 import { EndPhaseCommand } from "model/commands/end_phase_command.js";
 import { MacroCommand } from "model/commands/macro_command.js";
 import { MoveCommand } from "model/commands/move_command.js";
 import { hexOf } from "xlib/hexlib.js";
+import { fixedRandom, resetFixedRandom } from "xlib/random.js";
 
 
-/**
- * @param {Hex} hex
- */
-function scoreFunction(hex) {
-    return hex.q + 10 * hex.r;
-}
-
 describe('construct the best move for each unit individually', () => {
+    /**
+     * @param {Hex} hex
+     */
+    function scoreFunction(hex) {
+        return hex.q + 10 * hex.r;
+    }
+
     test('just one command', () => {
         const availableMoves = [
             new MoveCommand(hexOf(1, 4), hexOf(1, 5)),
@@ -110,3 +111,47 @@ describe('construct the best move for each unit individually', () => {
         ]).toString());
     });
 });
+
+describe('deriving a sample from another sample', () => {
+    const originalRandom = Math.random;
+    beforeEach(() => {
+        resetFixedRandom();
+        Math.random = fixedRandom;
+    });
+
+    afterEach(() => {
+        Math.random = originalRandom;
+    });
+
+    const availableCommands = [
+        new MoveCommand(hexOf(1, 1), hexOf(0, 0)),
+        new MoveCommand(hexOf(3, 3), hexOf(0, 0)),
+        new EndPhaseCommand(),
+    ]
+    const existingSample = new MacroCommand([
+        new MoveCommand(hexOf(1, 1), hexOf(0, 0)),
+        new MoveCommand(hexOf(4, 4), hexOf(10, 10)),
+    ]);
+    const existingSampleAsString = existingSample.toString();
+
+    const newSample = perturbSample(availableCommands, existingSample);
+
+    test('changes one unit movement', () => {
+        expect(newSample.toString()).toEqual(new MacroCommand([
+            new MoveCommand(hexOf(3, 3), hexOf(0, 0)),
+            new MoveCommand(hexOf(4, 4), hexOf(10, 10)),
+        ]).toString());
+    });
+
+    test('does not change the existing sample', () => {
+        expect(existingSample.toString()).toEqual(existingSampleAsString);
+    });
+
+    test('chooses unit at random', () => {
+    });
+
+    test('chooses move at random', () => {
+    });
+
+
+});