Skip to content

Commit

Permalink
Add Policy Gradient methods
Browse files Browse the repository at this point in the history
  • Loading branch information
dennybritz committed Oct 2, 2016
1 parent 51047b9 commit ea5724c
Show file tree
Hide file tree
Showing 7 changed files with 1,141 additions and 45 deletions.
37 changes: 30 additions & 7 deletions DQN/Deep Q Learning Solution.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,39 @@
" self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name=\"actions\")\n",
"\n",
" X = tf.to_float(self.X_pl) / 255.0\n",
" \n",
" # TODO: Implement the Tensorflow graph!\n",
" batch_size = tf.shape(self.X_pl)[0]\n",
" self.predictions = tf.zeros(shape=[batch_size, len(VALID_ACTIONS)])\n",
" self.loss = tf.constant(0.0)\n",
" self.train_op = tf.no_op(\"train_pp\")\n",
" \n",
"\n",
" # Three convolutional layers\n",
" conv1 = tf.contrib.layers.conv2d(\n",
" X, 32, 8, 4, activation_fn=tf.nn.relu)\n",
" conv2 = tf.contrib.layers.conv2d(\n",
" conv1, 64, 4, 2, activation_fn=tf.nn.relu)\n",
" conv3 = tf.contrib.layers.conv2d(\n",
" conv2, 64, 3, 1, activation_fn=tf.nn.relu)\n",
"\n",
" # Fully connected layers\n",
" flattened = tf.contrib.layers.flatten(conv3)\n",
" fc1 = tf.contrib.layers.fully_connected(flattened, 512)\n",
" self.predictions = tf.contrib.layers.fully_connected(fc1, len(VALID_ACTIONS))\n",
"\n",
" # Get the predictions for the chosen actions only\n",
" gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1] + self.actions_pl\n",
" self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices)\n",
"\n",
" # Calcualte the loss\n",
" self.losses = tf.squared_difference(self.y_pl, self.action_predictions)\n",
" self.loss = tf.reduce_mean(self.losses)\n",
"\n",
" # Optimizer Parameters from original paper\n",
" self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)\n",
" self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())\n",
"\n",
" # Summaries for Tensorboard\n",
" self.summaries = tf.merge_summary([\n",
" tf.scalar_summary(\"loss\", self.loss)\n",
" tf.scalar_summary(\"loss\", self.loss),\n",
" tf.histogram_summary(\"loss_hist\", self.losses),\n",
" tf.histogram_summary(\"q_values_hist\", self.predictions),\n",
" tf.scalar_summary(\"max_q_value\", tf.reduce_max(self.predictions))\n",
" ])\n",
"\n",
"\n",
Expand Down
46 changes: 13 additions & 33 deletions FA/Q-Learning with Value Function Approximation Solution.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 130,
"execution_count": 1,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 131,
"execution_count": 2,
"metadata": {
"collapsed": false
},
Expand All @@ -39,7 +39,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"[2016-09-14 09:56:46,458] Making new env: MountainCar-v0\n"
"[2016-09-30 11:06:51,428] Making new env: MountainCar-v0\n"
]
}
],
Expand All @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 132,
"execution_count": 3,
"metadata": {
"collapsed": false
},
Expand All @@ -62,7 +62,7 @@
" transformer_weights=None)"
]
},
"execution_count": 132,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -87,7 +87,7 @@
},
{
"cell_type": "code",
"execution_count": 133,
"execution_count": 4,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -150,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 134,
"execution_count": 5,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -181,7 +181,7 @@
},
{
"cell_type": "code",
"execution_count": 141,
"execution_count": 13,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -218,7 +218,6 @@
" # Print out which episode we're on, useful for debugging.\n",
" # Also print reward for last episode\n",
" last_reward = stats.episode_rewards[i_episode - 1]\n",
" print(\"\\rEpisode {}/{} ({})\".format(i_episode + 1, num_episodes, last_reward), end=\"\")\n",
" sys.stdout.flush()\n",
" \n",
" # Reset the environment and pick the first action\n",
Expand Down Expand Up @@ -249,6 +248,8 @@
" \n",
" # Update the function approximator using our target\n",
" estimator.update(state, action, td_target)\n",
" \n",
" print(\"\\rStep {} @ Episode {}/{} ({})\".format(t, i_episode + 1, num_episodes, last_reward), end=\"\")\n",
" \n",
" if done:\n",
" break\n",
Expand All @@ -260,7 +261,7 @@
},
{
"cell_type": "code",
"execution_count": 142,
"execution_count": 14,
"metadata": {
"collapsed": true
},
Expand All @@ -271,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 143,
"execution_count": 15,
"metadata": {
"collapsed": false
},
Expand All @@ -280,28 +281,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"3471 @ Episode 1/100 (0.0)"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-143-c805e2bdd5c2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# because our initial estimate for all states is too \"optimistic\" which leads\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# to the exploration of all states.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mq_learning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-141-b9eb0d98a2f3>\u001b[0m in \u001b[0;36mq_learning\u001b[0;34m(env, estimator, num_episodes, discount_factor, epsilon, epsilon_decay)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;31m# TD Update\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0mq_values_next\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnext_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;31m# Q-Value TD Target\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-133-75a785916b93>\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, s, a)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \"\"\"\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mfeatures\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeaturize_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-133-75a785916b93>\u001b[0m in \u001b[0;36mfeaturize_state\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 22\u001b[0m \"\"\"\n\u001b[1;32m 23\u001b[0m \u001b[0mscaled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0mfeaturized\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeaturizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscaled\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfeaturized\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 521\u001b[0m Xs = Parallel(n_jobs=self.n_jobs)(\n\u001b[1;32m 522\u001b[0m \u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_transform_one\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrans\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_weights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m for name, trans in self.transformer_list)\n\u001b[0m\u001b[1;32m 524\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtocsr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 798\u001b[0m \u001b[0;31m# was dispatched. In particular this covers the edge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 799\u001b[0m \u001b[0;31m# case of Parallel used with an exhausted iterator.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 800\u001b[0;31m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 801\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 802\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mtasks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBatchedCalls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitertools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mislice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtasks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;31m# No more tasks available in the iterator: tell caller to stop.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, iterator_slice)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miterator_slice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator_slice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 521\u001b[0m Xs = Parallel(n_jobs=self.n_jobs)(\n\u001b[1;32m 522\u001b[0m \u001b[0mdelayed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_transform_one\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrans\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransformer_weights\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m for name, trans in self.transformer_list)\n\u001b[0m\u001b[1;32m 524\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtocsr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/venvs/tf/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mdelayed\u001b[0;34m(function, check_pickle)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m \u001b[0mdelayed_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunctools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwraps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelayed_function\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 164\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;34m\" functools.wraps fails on some callable objects \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dennybritz/homebrew/Cellar/python3/3.5.1/Frameworks/Python.framework/Versions/3.5/lib/python3.5/functools.py\u001b[0m in \u001b[0;36mupdate_wrapper\u001b[0;34m(wrapper, wrapped, assigned, updated)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mattr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mupdated\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapper\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwrapped\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
"Step 112 @ Episode 100/100 (-134.0)"
]
}
],
Expand Down
Loading

0 comments on commit ea5724c

Please sign in to comment.