Fix replay memory

devjmc · Sep 28, 2016 · 1b95e24 · 1b95e24
1 parent 8a81815
commit 1b95e24
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 33 deletions.
diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -27,26 +27,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[2016-09-26 12:51:41,737] Making new env: Breakout-v0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "env = gym.envs.make(\"Breakout-v0\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -58,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -92,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -182,21 +174,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ 0.  0.  0.  0.]\n",
-      " [ 0.  0.  0.  0.]]\n",
-      "0.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# For Testing....\n",
     "\n",
@@ -227,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -257,7 +239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -341,7 +323,7 @@
     "    Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n",
     "\n",
     "    # The replay memory\n",
-    "    replay_memory = deque(maxlen=replay_memory_size)\n",
+    "    replay_memory = []\n",
     "\n",
     "    # Keeps track of useful statistics\n",
     "    stats = plotting.EpisodeStats(\n",

diff --git a/DQN/Deep Q Learning.ipynb b/DQN/Deep Q Learning.ipynb
@@ -346,7 +346,7 @@
     "    Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n",
     "\n",
     "    # The replay memory\n",
-    "    replay_memory = deque(maxlen=replay_memory_size)\n",
+    "    replay_memory = []\n",
     "\n",
     "    # Keeps track of useful statistics\n",
     "    stats = plotting.EpisodeStats(\n",

diff --git a/DQN/Double DQN Solution.ipynb b/DQN/Double DQN Solution.ipynb
@@ -323,7 +323,7 @@
     "    Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n",
     "\n",
     "    # The replay memory\n",
-    "    replay_memory = deque(maxlen=replay_memory_size)\n",
+    "    replay_memory = []\n",
     "\n",
     "    # Keeps track of useful statistics\n",
     "    stats = plotting.EpisodeStats(\n",
@@ -503,7 +503,7 @@
     "                                    experiment_dir=experiment_dir,\n",
     "                                    num_episodes=10000,\n",
     "                                    replay_memory_size=500000,\n",
-    "                                    replay_memory_init_size=50000,\n",
+    "                                    replay_memory_init_size=500,\n",
     "                                    update_target_estimator_every=10000,\n",
     "                                    epsilon_start=1.0,\n",
     "                                    epsilon_end=0.1,\n",

diff --git a/DQN/dqn.py b/DQN/dqn.py
@@ -238,7 +238,7 @@ def deep_q_learning(sess,
     Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])
 
     # The replay memory
-    replay_memory = deque(maxlen=replay_memory_size)
+    replay_memory = []
 
     # Keeps track of useful statistics
     stats = plotting.EpisodeStats(