Fix comments - Fixes dennybritz#18

devjmc · Nov 1, 2016 · fd07768 · fd07768
1 parent 800bfa5
commit fd07768
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 18 deletions.
diff --git a/PolicyGradient/CliffWalk Actor Critic Solution.ipynb b/PolicyGradient/CliffWalk Actor Critic Solution.ipynb
@@ -139,16 +139,15 @@
    "source": [
     "def actor_critic(env, estimator_policy, estimator_value, num_episodes, discount_factor=1.0):\n",
     "    \"\"\"\n",
-    "    Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
-    "    Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
+    "    Actor Critic Algorithm. Optimizes the policy \n",
+    "    function approximator using policy gradient.\n",
     "    \n",
     "    Args:\n",
     "        env: OpenAI environment.\n",
-    "        estimator: Action-Value function estimator\n",
-    "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
-    "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
-    "        epsilon_decay: Each episode, epsilon is decayed by this factor\n",
+    "        estimator_policy: Policy Function to be optimized \n",
+    "        estimator_value: Value function approximator, used as a baseline\n",
+    "        num_episodes: Number of episodes to run for\n",
+    "        discount_factor: Time-discount factor\n",
     "    \n",
     "    Returns:\n",
     "        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.\n",
@@ -307,7 +306,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.0"
   }
  },
  "nbformat": 4,

diff --git a/PolicyGradient/CliffWalk REINFORCE with Baseline Solution.ipynb b/PolicyGradient/CliffWalk REINFORCE with Baseline Solution.ipynb
@@ -139,7 +139,7 @@
    "source": [
     "def reinforce(env, estimator_policy, estimator_value, num_episodes, discount_factor=1.0):\n",
     "    \"\"\"\n",
-    "    REINFORCE (Monte Carlo Policy Gradient) Algorotihm. Optimizes the policy\n",
+    "    REINFORCE (Monte Carlo Policy Gradient) Algorithm. Optimizes the policy\n",
     "    function approximator using policy gradient.\n",
     "    \n",
     "    Args:\n",
@@ -316,7 +316,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.0"
   }
  },
  "nbformat": 4,

diff --git a/PolicyGradient/Continuous MountainCar Actor Critic Solution.ipynb b/PolicyGradient/Continuous MountainCar Actor Critic Solution.ipynb
@@ -232,16 +232,15 @@
    "source": [
     "def actor_critic(env, estimator_policy, estimator_value, num_episodes, discount_factor=1.0):\n",
     "    \"\"\"\n",
-    "    Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
-    "    Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
+    "    Actor Critic Algorithm. Optimizes the policy \n",
+    "    function approximator using policy gradient.\n",
     "    \n",
     "    Args:\n",
     "        env: OpenAI environment.\n",
-    "        estimator: Action-Value function estimator\n",
-    "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
-    "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
-    "        epsilon_decay: Each episode, epsilon is decayed by this factor\n",
+    "        estimator_policy: Policy Function to be optimized \n",
+    "        estimator_value: Value function approximator, used as a baseline\n",
+    "        num_episodes: Number of episodes to run for\n",
+    "        discount_factor: Time-discount factor\n",
     "    \n",
     "    Returns:\n",
     "        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.\n",
@@ -410,7 +409,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.0"
   }
  },
  "nbformat": 4,