diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb index a6f59d81a..a1f8f1a35 100644 --- a/DQN/Deep Q Learning Solution.ipynb +++ b/DQN/Deep Q Learning Solution.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "collapsed": true }, @@ -27,26 +27,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2016-09-26 12:51:41,737] Making new env: Breakout-v0\n" - ] - } - ], + "outputs": [], "source": [ "env = gym.envs.make(\"Breakout-v0\")" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "collapsed": false }, @@ -58,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "collapsed": true }, @@ -92,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "collapsed": false }, @@ -182,21 +174,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0. 0. 0. 0.]\n", - " [ 0. 0. 0. 0.]]\n", - "0.0\n" - ] - } - ], + "outputs": [], "source": [ "# For Testing....\n", "\n", @@ -227,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "collapsed": false }, @@ -257,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "collapsed": true }, @@ -341,7 +323,7 @@ " Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n", "\n", " # The replay memory\n", - " replay_memory = deque(maxlen=replay_memory_size)\n", + " replay_memory = []\n", "\n", " # Keeps track of useful statistics\n", " stats = plotting.EpisodeStats(\n", diff --git a/DQN/Deep Q Learning.ipynb b/DQN/Deep Q Learning.ipynb index a203573fe..d2a295cf1 100644 --- a/DQN/Deep Q Learning.ipynb +++ b/DQN/Deep Q Learning.ipynb @@ -346,7 +346,7 @@ " Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n", "\n", " # The replay memory\n", - " replay_memory = deque(maxlen=replay_memory_size)\n", + " replay_memory = []\n", "\n", " # Keeps track of useful statistics\n", " stats = plotting.EpisodeStats(\n", diff --git a/DQN/Double DQN Solution.ipynb b/DQN/Double DQN Solution.ipynb index c372b458c..122c4dd3f 100644 --- a/DQN/Double DQN Solution.ipynb +++ b/DQN/Double DQN Solution.ipynb @@ -323,7 +323,7 @@ " Transition = namedtuple(\"Transition\", [\"state\", \"action\", \"reward\", \"next_state\", \"done\"])\n", "\n", " # The replay memory\n", - " replay_memory = deque(maxlen=replay_memory_size)\n", + " replay_memory = []\n", "\n", " # Keeps track of useful statistics\n", " stats = plotting.EpisodeStats(\n", @@ -503,7 +503,7 @@ " experiment_dir=experiment_dir,\n", " num_episodes=10000,\n", " replay_memory_size=500000,\n", - " replay_memory_init_size=50000,\n", + " replay_memory_init_size=500,\n", " update_target_estimator_every=10000,\n", " epsilon_start=1.0,\n", " epsilon_end=0.1,\n", diff --git a/DQN/dqn.py b/DQN/dqn.py index 9596778ae..4bead3bcf 100755 --- a/DQN/dqn.py +++ b/DQN/dqn.py @@ -238,7 +238,7 @@ def deep_q_learning(sess, Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory - replay_memory = deque(maxlen=replay_memory_size) + replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats(