Add Policy Gradient methods

devjmc · Oct 2, 2016 · ea5724c · ea5724c
1 parent 51047b9
commit ea5724c
Show file tree

Hide file tree

Showing 7 changed files with 1,141 additions and 45 deletions.
diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb
@@ -123,16 +123,39 @@
     "        self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name=\"actions\")\n",
     "\n",
     "        X = tf.to_float(self.X_pl) / 255.0\n",
-    "        \n",
-    "        # TODO: Implement the Tensorflow graph!\n",
     "        batch_size = tf.shape(self.X_pl)[0]\n",
-    "        self.predictions = tf.zeros(shape=[batch_size, len(VALID_ACTIONS)])\n",
-    "        self.loss = tf.constant(0.0)\n",
-    "        self.train_op = tf.no_op(\"train_pp\")\n",
-    "        \n",
+    "\n",
+    "        # Three convolutional layers\n",
+    "        conv1 = tf.contrib.layers.conv2d(\n",
+    "            X, 32, 8, 4, activation_fn=tf.nn.relu)\n",
+    "        conv2 = tf.contrib.layers.conv2d(\n",
+    "            conv1, 64, 4, 2, activation_fn=tf.nn.relu)\n",
+    "        conv3 = tf.contrib.layers.conv2d(\n",
+    "            conv2, 64, 3, 1, activation_fn=tf.nn.relu)\n",
+    "\n",
+    "        # Fully connected layers\n",
+    "        flattened = tf.contrib.layers.flatten(conv3)\n",
+    "        fc1 = tf.contrib.layers.fully_connected(flattened, 512)\n",
+    "        self.predictions = tf.contrib.layers.fully_connected(fc1, len(VALID_ACTIONS))\n",
+    "\n",
+    "        # Get the predictions for the chosen actions only\n",
+    "        gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1] + self.actions_pl\n",
+    "        self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices)\n",
+    "\n",
+    "        # Calcualte the loss\n",
+    "        self.losses = tf.squared_difference(self.y_pl, self.action_predictions)\n",
+    "        self.loss = tf.reduce_mean(self.losses)\n",
+    "\n",
+    "        # Optimizer Parameters from original paper\n",
+    "        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)\n",
+    "        self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())\n",
+    "\n",
     "        # Summaries for Tensorboard\n",
     "        self.summaries = tf.merge_summary([\n",
-    "            tf.scalar_summary(\"loss\", self.loss)\n",
+    "            tf.scalar_summary(\"loss\", self.loss),\n",
+    "            tf.histogram_summary(\"loss_hist\", self.losses),\n",
+    "            tf.histogram_summary(\"q_values_hist\", self.predictions),\n",
+    "            tf.scalar_summary(\"max_q_value\", tf.reduce_max(self.predictions))\n",
     "        ])\n",
     "\n",
     "\n",

diff --git a/FA/Q-Learning with Value Function Approximation Solution.ipynb b/FA/Q-Learning with Value Function Approximation Solution.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 1,
    "metadata": {
     "collapsed": false
    },
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 2,
    "metadata": {
     "collapsed": false
    },
@@ -39,7 +39,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2016-09-14 09:56:46,458] Making new env: MountainCar-v0\n"
+      "[2016-09-30 11:06:51,428] Making new env: MountainCar-v0\n"
      ]
     }
    ],
@@ -49,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 132,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
@@ -62,7 +62,7 @@
        "       transformer_weights=None)"
       ]
      },
-     "execution_count": 132,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 133,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -150,7 +150,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 134,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
@@ -181,7 +181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 13,
    "metadata": {
     "collapsed": false
    },
@@ -218,7 +218,6 @@
     "        # Print out which episode we're on, useful for debugging.\n",
     "        # Also print reward for last episode\n",
     "        last_reward = stats.episode_rewards[i_episode - 1]\n",
-    "        print(\"\\rEpisode {}/{} ({})\".format(i_episode + 1, num_episodes, last_reward), end=\"\")\n",
     "        sys.stdout.flush()\n",
     "        \n",
     "        # Reset the environment and pick the first action\n",
@@ -249,6 +248,8 @@
     "            \n",
     "            # Update the function approximator using our target\n",
     "            estimator.update(state, action, td_target)\n",
+    "            \n",
+    "            print(\"\\rStep {} @ Episode {}/{} ({})\".format(t, i_episode + 1, num_episodes, last_reward), end=\"\")\n",
     "                \n",
     "            if done:\n",
     "                break\n",
@@ -260,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 142,
+   "execution_count": 14,
    "metadata": {
     "collapsed": true
    },
@@ -271,7 +272,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 143,
+   "execution_count": 15,
    "metadata": {
     "collapsed": false
    },
@@ -280,28 +281,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "3471 @ Episode 1/100 (0.0)"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-143-c805e2bdd5c2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# because our initial estimate for all states is too \"optimistic\" which leads\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# to the exploration of all states.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mq_learning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m<ipython-input-141-b9eb0d98a2f3>\u001b[0m in \u001b[0;36mq_learning\u001b[0;34m(env, estimator, num_episodes, discount_factor, epsilon, epsilon_decay)\u001b[0m\n\u001b[1;32m     50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m             \u001b[0;31m# TD Update\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m             \u001b[0mq_values_next\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnext_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m             \u001b[0;31m# Q-Value TD Target\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<ipython-input-133-75a785916b93>\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, s, a)\u001b[0m\n\u001b[1;32m     39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m         \"\"\"\n\u001b[0;32m---> 41\u001b[0;31m         \u001b[0mfeatures\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeaturize_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     42\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     43\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<ipython-input-133-75a785916b93>\u001b[0m in \u001b[0;36mfeaturize_state\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m     22\u001b[0m         \"\"\"\n\u001b[1;32m     23\u001b[0m         \u001b[0mscaled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m         \u001b[0mfeaturized\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeaturizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscaled\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     25\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mfeaturized\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    521\u001b[0m         Xs = Parallel(n_jobs=self.n_jobs)(\n\u001b[1;32m    522\u001b[0m             \u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_transform_one\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrans\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_weights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m             for name, trans in self.transformer_list)\n\u001b[0m\u001b[1;32m    524\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    525\u001b[0m             \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtocsr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    798\u001b[0m             \u001b[0;31m# was dispatched. In particular this covers the edge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    799\u001b[0m             \u001b[0;31m# case of Parallel used with an exhausted iterator.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 800\u001b[0;31m             \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    801\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    802\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m    651\u001b[0m             \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    652\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m             \u001b[0mtasks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBatchedCalls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitertools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mislice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    654\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtasks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    655\u001b[0m                 \u001b[0;31m# No more tasks available in the iterator: tell caller to stop.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, iterator_slice)\u001b[0m\n\u001b[1;32m     66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     67\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterator_slice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator_slice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     69\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    521\u001b[0m         Xs = Parallel(n_jobs=self.n_jobs)(\n\u001b[1;32m    522\u001b[0m             \u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_transform_one\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrans\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_weights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m             for name, trans in self.transformer_list)\n\u001b[0m\u001b[1;32m    524\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    525\u001b[0m             \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtocsr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mdelayed\u001b[0;34m(function, check_pickle)\u001b[0m\n\u001b[1;32m    161\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m         \u001b[0mdelayed_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunctools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwraps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelayed_function\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    164\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    165\u001b[0m         \u001b[0;34m\" functools.wraps fails on some callable objects \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/Users/dennybritz/homebrew/Cellar/python3/3.5.1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/functools.py\u001b[0m in \u001b[0;36mupdate_wrapper\u001b[0;34m(wrapper, wrapped, assigned, updated)\u001b[0m\n\u001b[1;32m     62\u001b[0m             \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m             \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     65\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     66\u001b[0m         \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapped\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+      "Step 112 @ Episode 100/100 (-134.0)"
      ]
     }
    ],