From a64e36dd3164c5732adbdfdf89b1fffa99ef8eff Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 15:21:04 +0530
Subject: [PATCH 01/14] - Added Gradient Descent

---
 README.md                            |   1 +
 gradient descent/gradient_descent.py | 119 +++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 gradient descent/gradient_descent.py

diff --git a/README.md b/README.md
index 1dbb1892..9fd6214f 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Community (college) maintained list of Algorithms and Data Structures implementa
 | [Dijkstra Algorithm](https://en.wikipedia.org/wiki/Dijkstra's_algorithm/) | [:white_check_mark:](dijkstra/dijkstra.c) | | [:white_check_mark:](dijkstra/Dijkstra.java) | [:white_check_mark:](dijkstra/dijkstra.py) | | |
 | [Euclidean GCD](https://en.wikipedia.org/wiki/Euclidean_algorithm) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.c) | | [:white_check_mark:](euclidean_gcd/EuclideanGCD.java) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.py) | | [:white_check_mark:](euclidean_gcd/euclideanGCD.js) |
 | [Exponentiation by Squaring](https://en.wikipedia.org/wiki/Exponentiation_by_squaring) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.c) | | | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.py) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.go) | [:white_check_mark:](exponentiation_by_squaring/exponentiationBySquaring.js) |
+| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](heap_sort/heap_sort.py) | | |
 | [Heap Sort](https://en.wikipedia.org/wiki/Heapsort) | [:white_check_mark:](heap_sort/heap_sort.c) | | [:white_check_mark:](heap_sort/HeapSort.java) | [:white_check_mark:](heap_sort/heap_sort.py) | | |
 | [Insertion Sort](https://en.wikipedia.org/wiki/Insertion_sort) | [:white_check_mark:](insertion_sort/insertion_sort.c) | | [:white_check_mark:](insertion_sort/InsertionSort.java)| [:white_check_mark:](insertion_sort/insertion_sort.py) | [:white_check_mark:](insertion_sort/insertion_sort.go) | |
 | [k-NN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) | | | | [:white_check_mark:](k_nn/k_nn.py) | | |
diff --git a/gradient descent/gradient_descent.py b/gradient descent/gradient_descent.py
new file mode 100644
index 00000000..38f1d1d8
--- /dev/null
+++ b/gradient descent/gradient_descent.py	
@@ -0,0 +1,119 @@
+"""
+Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
+"""
+import numpy
+
+# List of input, output pairs
+train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
+test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
+parameter_vector = [2, 4, 1, 5]
+m = len(train_data)
+LEARNING_RATE = 0.009
+
+
+def _error(example_no, data_set='train'):
+    """
+    :param data_set: train data or test data
+    :param example_no: example number whose error has to be checked
+    :return: error in example pointed by example number.
+    """
+    return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
+
+
+def _hypothesis_value(data_input_tuple):
+    """
+    Calculates hypothesis function value for a given input
+    :param data_input_tuple: Input tuple of a particular example
+    :return: Value of hypothesis function at that point.
+    Note that parameter input value is fixed as 1.
+    Also known as 'biased input' inn ML terminology and the parameter associated with it
+    is known as 'biased parameter'.
+    """
+    hyp_val = 0
+    for i in range(len(parameter_vector) - 1):
+        hyp_val = hyp_val + data_input_tuple[i]*parameter_vector[i+1]
+    hyp_val = hyp_val + 1*parameter_vector[0]
+    return hyp_val
+
+
+def output(example_no, data_set):
+    """
+    :param data_set: test data or train data
+    :param example_no: example whose output is to be fetched
+    :return: output for that example
+    """
+    if data_set == 'train':
+        return train_data[example_no][1]
+    elif data_set == 'test':
+        return test_data[example_no][1]
+
+
+def calculate_hypothesis_value(example_no, data_set):
+    """
+    Calculates hypothesis value for a given example
+    :param data_set: test data or train_data
+    :param example_no: example whose hypothesis value is to be calculated
+    :return: hypothesis value for that example
+    """
+    if data_set == "train":
+        return _hypothesis_value(train_data[example_no][0])
+    elif data_set == "test":
+        return _hypothesis_value(test_data[example_no][0])
+
+
+def summation_of_cost_derivative(index, end=m):
+    """
+    Calculates the sum of cost function derivative
+    :param index: index wrt derivative is being calculated
+    :param end: value where summation ends, default is m, number of examples
+    :return: Returns the summation of cost derivative
+    Note: If index is -1, this means we are calculcating summation wrt to biased parameter.
+    """
+    summation_value = 0
+    for i in range(end):
+        if index == -1:
+            summation_value += _error(i)
+        else:
+            summation_value += _error(i)*train_data[i][0][index]
+    return summation_value
+
+
+def get_cost_derivative(index):
+    """
+    :param index: index of the parameter vector wrt to derivative is to be calculated
+    :return: derivative wrt to that index
+    Note: If index is -1, this means we are calculcating summation wrt to biased parameter.
+    """
+    cost_derivative_value = summation_of_cost_derivative(index, m)/m
+    return cost_derivative_value
+
+
+def run_gradient_descent():
+    global parameter_vector
+    # Tune these values to set a tolerance value for predicted output
+    absolute_error_limit = 0.000002
+    relative_error_limit = 0
+    j = 0
+    while True:
+        j = j+1
+        temp_parameter_vector = [0, 0, 0, 0]
+        for i in range(0, len(parameter_vector)):
+            cost_derivative = get_cost_derivative(i-1)
+            temp_parameter_vector[i] = parameter_vector[i] - LEARNING_RATE*cost_derivative
+        if numpy.allclose(parameter_vector, temp_parameter_vector,
+                          atol=absolute_error_limit, rtol=relative_error_limit):
+            break
+        parameter_vector = temp_parameter_vector
+    print("Number of iterations:", j)
+
+
+def test_gradient_descent():
+    for i in range(len(test_data)):
+        print("Actual output value:",  output(i, 'test'))
+        print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
+
+
+if __name__ == '__main__':
+    run_gradient_descent()
+    print("\nTesting gradient descent for a linear hypothesis function.\n")
+    test_gradient_descent()

From 84c1a6d64cde9471f9cfb270360ec5c4954233d0 Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 15:24:46 +0530
Subject: [PATCH 02/14] - Fixed typo

---
 README.md                                                  | 2 +-
 {gradient descent => gradient_descent}/gradient_descent.py | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {gradient descent => gradient_descent}/gradient_descent.py (100%)

diff --git a/README.md b/README.md
index 9fd6214f..b571fe67 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Community (college) maintained list of Algorithms and Data Structures implementa
 | [Dijkstra Algorithm](https://en.wikipedia.org/wiki/Dijkstra's_algorithm/) | [:white_check_mark:](dijkstra/dijkstra.c) | | [:white_check_mark:](dijkstra/Dijkstra.java) | [:white_check_mark:](dijkstra/dijkstra.py) | | |
 | [Euclidean GCD](https://en.wikipedia.org/wiki/Euclidean_algorithm) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.c) | | [:white_check_mark:](euclidean_gcd/EuclideanGCD.java) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.py) | | [:white_check_mark:](euclidean_gcd/euclideanGCD.js) |
 | [Exponentiation by Squaring](https://en.wikipedia.org/wiki/Exponentiation_by_squaring) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.c) | | | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.py) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.go) | [:white_check_mark:](exponentiation_by_squaring/exponentiationBySquaring.js) |
-| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](heap_sort/heap_sort.py) | | |
+| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](gradient_descent/gradient_descent.py) | | |
 | [Heap Sort](https://en.wikipedia.org/wiki/Heapsort) | [:white_check_mark:](heap_sort/heap_sort.c) | | [:white_check_mark:](heap_sort/HeapSort.java) | [:white_check_mark:](heap_sort/heap_sort.py) | | |
 | [Insertion Sort](https://en.wikipedia.org/wiki/Insertion_sort) | [:white_check_mark:](insertion_sort/insertion_sort.c) | | [:white_check_mark:](insertion_sort/InsertionSort.java)| [:white_check_mark:](insertion_sort/insertion_sort.py) | [:white_check_mark:](insertion_sort/insertion_sort.go) | |
 | [k-NN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) | | | | [:white_check_mark:](k_nn/k_nn.py) | | |
diff --git a/gradient descent/gradient_descent.py b/gradient_descent/gradient_descent.py
similarity index 100%
rename from gradient descent/gradient_descent.py
rename to gradient_descent/gradient_descent.py

From 9ddb8b98c32ae032206791714bb71805e65a1e91 Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 15:43:24 +0530
Subject: [PATCH 03/14] - Changed gradient_descent.py according to PEP8
 guidelines.

---
 gradient_descent/gradient_descent.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 38f1d1d8..14d50da9 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -4,7 +4,8 @@
 import numpy
 
 # List of input, output pairs
-train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
+train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
+              ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
 test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
 parameter_vector = [2, 4, 1, 5]
 m = len(train_data)
@@ -99,7 +100,8 @@ def run_gradient_descent():
         temp_parameter_vector = [0, 0, 0, 0]
         for i in range(0, len(parameter_vector)):
             cost_derivative = get_cost_derivative(i-1)
-            temp_parameter_vector[i] = parameter_vector[i] - LEARNING_RATE*cost_derivative
+            temp_parameter_vector[i] = parameter_vector[i] - \
+                LEARNING_RATE*cost_derivative
         if numpy.allclose(parameter_vector, temp_parameter_vector,
                           atol=absolute_error_limit, rtol=relative_error_limit):
             break

From f3cb6918ac815a6ae38c46472f3bed06e2d20e3d Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 17:16:53 +0530
Subject: [PATCH 04/14] Fixed typos in comments

---
 gradient_descent/gradient_descent.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 14d50da9..2254d737 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -27,7 +27,7 @@ def _hypothesis_value(data_input_tuple):
     :param data_input_tuple: Input tuple of a particular example
     :return: Value of hypothesis function at that point.
     Note that parameter input value is fixed as 1.
-    Also known as 'biased input' inn ML terminology and the parameter associated with it
+    Also known as 'biased input' in ML terminology and the parameter associated with it
     is known as 'biased parameter'.
     """
     hyp_val = 0
@@ -68,7 +68,7 @@ def summation_of_cost_derivative(index, end=m):
     :param index: index wrt derivative is being calculated
     :param end: value where summation ends, default is m, number of examples
     :return: Returns the summation of cost derivative
-    Note: If index is -1, this means we are calculcating summation wrt to biased parameter.
+    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
     """
     summation_value = 0
     for i in range(end):
@@ -83,7 +83,7 @@ def get_cost_derivative(index):
     """
     :param index: index of the parameter vector wrt to derivative is to be calculated
     :return: derivative wrt to that index
-    Note: If index is -1, this means we are calculcating summation wrt to biased parameter.
+    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
     """
     cost_derivative_value = summation_of_cost_derivative(index, m)/m
     return cost_derivative_value

From 3f7c2d1cefda68874e0f0b3c88ca4f5a3206717d Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 18:37:48 +0530
Subject: [PATCH 05/14] Update gradient_descent.py

---
 gradient_descent/gradient_descent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 2254d737..39dbeecc 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -96,7 +96,7 @@ def run_gradient_descent():
     relative_error_limit = 0
     j = 0
     while True:
-        j = j+1
+        j += 1
         temp_parameter_vector = [0, 0, 0, 0]
         for i in range(0, len(parameter_vector)):
             cost_derivative = get_cost_derivative(i-1)
@@ -111,7 +111,7 @@ def run_gradient_descent():
 
 def test_gradient_descent():
     for i in range(len(test_data)):
-        print("Actual output value:",  output(i, 'test'))
+        print("Actual output value:", output(i, 'test'))
         print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
 
 

From 29ce52d76fccbb81357d19f44531e7c660eb2605 Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 19:04:13 +0530
Subject: [PATCH 06/14] Update gradient_descent.py

---
 gradient_descent/gradient_descent.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 39dbeecc..ebd29344 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -26,14 +26,14 @@ def _hypothesis_value(data_input_tuple):
     Calculates hypothesis function value for a given input
     :param data_input_tuple: Input tuple of a particular example
     :return: Value of hypothesis function at that point.
-    Note that parameter input value is fixed as 1.
-    Also known as 'biased input' in ML terminology and the parameter associated with it
-    is known as 'biased parameter'.
+    Note that there is an 'biased input' whose value is fixed as 1.
+    It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
+    So, we have to take care of it separately. Line 36 takes care of it. 
     """
     hyp_val = 0
     for i in range(len(parameter_vector) - 1):
-        hyp_val = hyp_val + data_input_tuple[i]*parameter_vector[i+1]
-    hyp_val = hyp_val + 1*parameter_vector[0]
+        hyp_val += data_input_tuple[i]*parameter_vector[i+1]
+    hyp_val += parameter_vector[0]
     return hyp_val
 
 

From 3556dc3048f387ec8fa042fdf5c9b7cccf949afb Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Thu, 22 Jun 2017 21:12:44 +0530
Subject: [PATCH 07/14] Update gradient_descent.py

---
 gradient_descent/gradient_descent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index ebd29344..1e771b07 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -28,7 +28,7 @@ def _hypothesis_value(data_input_tuple):
     :return: Value of hypothesis function at that point.
     Note that there is an 'biased input' whose value is fixed as 1.
     It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
-    So, we have to take care of it separately. Line 36 takes care of it. 
+    So, we have to take care of it separately. Line 36 takes care of it.
     """
     hyp_val = 0
     for i in range(len(parameter_vector) - 1):

From 77c58df42339ad3354494b7c6fc527f0dd03367f Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Tue, 27 Jun 2017 15:41:24 +0530
Subject: [PATCH 08/14] - Modified gradient_descent.py - Added open source
 datasets

---
 .../linear_regression/rock_aquifer_test.dat   |   6 +
 .../linear_regression/rock_aquifer_train.dat  |  25 +++
 gradient_descent/gradient_descent.py          | 195 ++++++++++--------
 3 files changed, 143 insertions(+), 83 deletions(-)
 create mode 100644 .datasets/linear_regression/rock_aquifer_test.dat
 create mode 100644 .datasets/linear_regression/rock_aquifer_train.dat

diff --git a/.datasets/linear_regression/rock_aquifer_test.dat b/.datasets/linear_regression/rock_aquifer_test.dat
new file mode 100644
index 00000000..e5a030cd
--- /dev/null
+++ b/.datasets/linear_regression/rock_aquifer_test.dat
@@ -0,0 +1,6 @@
+       4  2.7760    0.74 -1.6989    2.82   97.18   11.09   0.991   11.46   0.936    1.64   95.54
+       4  2.7467    0.68 -2.0000   14.09   85.91   11.84   1.289   12.14   1.187    1.79   84.15
+       4  2.7691    1.70 -1.6989    8.60   91.40   10.25   1.169   10.70   1.180    1.89   89.50
+       4  2.7176    2.70 -1.5228   10.05   89.95   11.11   0.669   11.48   0.788    1.97   88.51
+       4  2.6685    2.90 -1.6989   22.08   77.92   11.81   1.152   12.13   1.128    1.74   76.17
+       4  2.8425    1.80 -1.6989   16.39   83.61   12.07   1.361   12.21   1.156    1.53   82.04
\ No newline at end of file
diff --git a/.datasets/linear_regression/rock_aquifer_train.dat b/.datasets/linear_regression/rock_aquifer_train.dat
new file mode 100644
index 00000000..b528f4ec
--- /dev/null
+++ b/.datasets/linear_regression/rock_aquifer_train.dat
@@ -0,0 +1,25 @@
+       1  2.6975    0.83 -1.2218    4.43   95.57   11.88   1.563   12.19   1.379   74.44   21.08
+       1  2.6415    0.59 -1.6989    7.19   92.81   10.31   1.410   10.85   1.361   85.80    7.00
+       1  2.6457    0.67 -1.6989    2.17   97.83   12.51   1.690   12.92   1.759   93.71    4.13
+       1  2.6342    0.50 -1.6989    6.34   93.66   10.76   1.327   11.40   1.361   95.55    2.17
+       1  2.6236    1.10 -1.2218    2.24   97.76    8.62   1.560    8.92   1.427   77.38   20.41
+       2  2.7278    1.10 -1.6989    8.85   91.16    9.85   0.848   10.25   0.815    1.22   89.93
+       2  2.7480    2.80 -1.2218    7.72   92.28    9.60   1.045    9.85   1.052    2.02   90.26
+       2  2.6678    4.60 -1.7213    8.18   91.82    8.75   0.999    9.10   0.879    1.56   75.38
+       2  2.6949    3.50 -1.5228    5.41   94.59    9.93   0.910   10.24   0.848    1.27   93.28
+       2  2.7020    8.60 -1.3010   19.25   80.75   10.00   0.850   10.54   0.774    1.43   79.32
+       2  2.7360    0.81 -1.6989   15.60   84.40   11.41   0.826   11.76   0.840    1.52   82.84
+       2  2.7420    1.40 -2.0000   16.78   83.22   11.49   0.618   11.87   0.619    1.62   81.62
+       2  2.6832    2.70 -1.6989   26.29   73.71   10.79   0.747   11.13   0.688    2.36   71.35
+       3  2.7504    1.20 -1.5229    4.63   95.37   10.43   0.888   10.80   0.962    7.21   88.16
+       3  2.7194    0.91 -1.6989   23.55   76.45   11.56   0.681   11.92   0.675    1.99   74.46
+       3  2.6247    1.40 -1.3979    2.56   97.44   10.50   0.878   10.82   0.806   89.73    7.71
+       3  2.7223    3.00 -1.3010   10.54   89.46   10.74   0.942   11.08   0.711   92.19    7.27
+       3  2.6185    1.30 -1.5228    7.06   92.94   11.41   0.784   11.83   0.830   89.56    3.37
+       3  2.6395    0.18 -2.0458    3.10   96.90   12.12   0.804   12.61   0.918   91.76    5.18
+       3  2.6252    5.40 -1.6989   18.83   81.17   11.59   0.904   11.96   0.805    5.09   76.07
+       3  2.6531    0.30 -2.3010   12.65   87.35   11.51   0.849   11.85   0.749   54.47   32.87
+       3  2.6204    0.56 -1.6989    5.88   94.12   11.32   0.861   11.72   0.811   91.32    2.80
+       4  2.7691    0.93 -2.0000    2.92   97.08   10.57   0.640   10.97   0.587    1.42   95.65
+       4  2.7899    0.23 -1.6989    2.11   97.89   11.55   0.759   11.80   0.741    4.82   93.05
+       4  2.7602    1.50 -1.6989    3.87   96.13   10.35   1.367   10.66   1.211    1.92   94.20
\ No newline at end of file
diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 1e771b07..9ecda4a8 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -2,120 +2,149 @@
 Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
 """
 import numpy
+from os.path import expanduser
+import matplotlib.pyplot as plt
 
-# List of input, output pairs
-train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
-              ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
-test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
-parameter_vector = [2, 4, 1, 5]
-m = len(train_data)
-LEARNING_RATE = 0.009
 
-
-def _error(example_no, data_set='train'):
-    """
-    :param data_set: train data or test data
-    :param example_no: example number whose error has to be checked
-    :return: error in example pointed by example number.
-    """
-    return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
-
-
-def _hypothesis_value(data_input_tuple):
+def _error(input_data, output_data, parameter_vector):
     """
-    Calculates hypothesis function value for a given input
-    :param data_input_tuple: Input tuple of a particular example
-    :return: Value of hypothesis function at that point.
-    Note that there is an 'biased input' whose value is fixed as 1.
-    It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
-    So, we have to take care of it separately. Line 36 takes care of it.
+    :param input_data:  Data whose summation of cost derivative has to be calculated
+    :param output_data: Output corresponding to training data
+    :param parameter_vector: Weight vector
+    :return:
     """
-    hyp_val = 0
-    for i in range(len(parameter_vector) - 1):
-        hyp_val += data_input_tuple[i]*parameter_vector[i+1]
-    hyp_val += parameter_vector[0]
-    return hyp_val
+    return _hypothesis_value(input_data, parameter_vector) - output_data
 
 
-def output(example_no, data_set):
+def _hypothesis_value(data, parameter_vector):
     """
-    :param data_set: test data or train data
-    :param example_no: example whose output is to be fetched
-    :return: output for that example
+    Calculates hypothesis function value for a given data
+    :param data: Data whose hypothesis value has to be calculated
+    :param parameter_vector: Weight vector
+    :return: Vector of values of hypothesis function for given data matrix.
     """
-    if data_set == 'train':
-        return train_data[example_no][1]
-    elif data_set == 'test':
-        return test_data[example_no][1]
+    hyp_val_matrix = numpy.asmatrix(numpy.dot(data, parameter_vector))
+    return hyp_val_matrix
 
 
-def calculate_hypothesis_value(example_no, data_set):
-    """
-    Calculates hypothesis value for a given example
-    :param data_set: test data or train_data
-    :param example_no: example whose hypothesis value is to be calculated
-    :return: hypothesis value for that example
-    """
-    if data_set == "train":
-        return _hypothesis_value(train_data[example_no][0])
-    elif data_set == "test":
-        return _hypothesis_value(test_data[example_no][0])
-
-
-def summation_of_cost_derivative(index, end=m):
+def summation_of_cost_derivative(input_data, output_data, parameter_vector):
     """
     Calculates the sum of cost function derivative
-    :param index: index wrt derivative is being calculated
-    :param end: value where summation ends, default is m, number of examples
+    :param input_data:  Data whose summation of cost derivative has to be calculated
+    :param output_data: Output corresponding to training data
+    :param parameter_vector: Weight vector
+
     :return: Returns the summation of cost derivative
-    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
     """
-    summation_value = 0
-    for i in range(end):
-        if index == -1:
-            summation_value += _error(i)
-        else:
-            summation_value += _error(i)*train_data[i][0][index]
+    summation_value = numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))
     return summation_value
 
 
-def get_cost_derivative(index):
+def get_cost_derivative(train_data, train_output, parameter_vector):
     """
-    :param index: index of the parameter vector wrt to derivative is to be calculated
-    :return: derivative wrt to that index
-    Note: If index is -1, this means we are calculating summation wrt to biased parameter.
+
+    :param train_data: Training data
+    :param train_output: Output corresponding to training data
+    :param parameter_vector: Weight vector
+    :return: derivative vector
     """
-    cost_derivative_value = summation_of_cost_derivative(index, m)/m
+    train_data_size = len(train_data)
+    cost_derivative_value = summation_of_cost_derivative(train_data, train_output,
+                                                         parameter_vector)/train_data_size
     return cost_derivative_value
 
 
-def run_gradient_descent():
-    global parameter_vector
-    # Tune these values to set a tolerance value for predicted output
-    absolute_error_limit = 0.000002
-    relative_error_limit = 0
+def run_gradient_descent(train_data, train_output, parameter_vector,
+                         learning_rate, absolute_error_limit,
+                         relative_error_limit):
+    """
+    Runs gradient descent on given training data and optimizes
+    parameters
+    :param train_data: Training data. Type: Matrix.
+    :param train_output: Output corresponding to each training data. Type: Vector,
+    may be matrix
+    :param parameter_vector: Randomly initialized weight vector
+    :param learning_rate: Rate at which gradient descent learns
+    :param absolute_error_limit: Tolerance for error in training.
+    :param relative_error_limit: Tolerance for error in training. It is relative to second parameter.
+    :return: Optimized parameter vector.
+    """
     j = 0
     while True:
         j += 1
-        temp_parameter_vector = [0, 0, 0, 0]
-        for i in range(0, len(parameter_vector)):
-            cost_derivative = get_cost_derivative(i-1)
-            temp_parameter_vector[i] = parameter_vector[i] - \
-                LEARNING_RATE*cost_derivative
+        cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector)
+        temp_parameter_vector = parameter_vector - \
+            learning_rate*cost_derivative
         if numpy.allclose(parameter_vector, temp_parameter_vector,
                           atol=absolute_error_limit, rtol=relative_error_limit):
             break
         parameter_vector = temp_parameter_vector
-    print("Number of iterations:", j)
+    return parameter_vector
 
 
-def test_gradient_descent():
-    for i in range(len(test_data)):
-        print("Actual output value:", output(i, 'test'))
-        print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
+def test_gradient_descent(test_data, test_output, parameter_vector):
+    """
+    :param test_data: Input data to be tested
+    :param test_output: Actual Output data for Input dataset
+    :param parameter_vector: Weight vector after optimized by using gradient descent
+    :return: None
+    """
+    actual_output = test_output
+    hypothesis_output = _hypothesis_value(test_data,
+                                          parameter_vector=parameter_vector)
+    num_examples = len(test_output)
+    plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output')
+    plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output')
+    plt.xlabel('Test case')
+    plt.ylabel('Output Values')
+    plt.xlim([-1, 7])
+    plt.legend()
+    plt.show()
+
+
+def download_data():
+    """
+    Downloads test and train data from GitHub repository
+    """
+    import requests
+    home = expanduser('~')
+    response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
+                            'linear_regression/rock_aquifer_train.dat')
+    if response:
+        with open(home+'/rock_aquifer_train.dat', 'wb') as f:
+            f.write(response.text)
+    response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
+                            'linear_regression/rock_aquifer_test.dat')
+    if response:
+        with open(home + '/rock_aquifer_test.dat', 'wb') as f:
+            f.write(response.text)
+
+
+def main():
+    download_data()
+    home = expanduser('~')
+    input_cols = list(range(11))
+    train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
+                                              usecols=input_cols))
+    num_data = len(train_data)
+    biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
+    train_data = numpy.column_stack((biased_tuple, train_data))
+    output_cols = (11,)
+    train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
+                                                usecols=output_cols)).transpose()
+    parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
+    learning_rate = 0.00015
+    absolute_error_limit = 0.000015
+    relative_error_limit = 0
+    parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate,
+                                            absolute_error_limit, relative_error_limit)
+    test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols)
+    num_data = len(test_data)
+    biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
+    test_data = numpy.column_stack((biased_tuple, test_data))
+    test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols)
+    test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector)
 
 
 if __name__ == '__main__':
-    run_gradient_descent()
-    print("\nTesting gradient descent for a linear hypothesis function.\n")
-    test_gradient_descent()
+    main()

From 1e0a530fbe5bad14b228c29968fb344abc351aa7 Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Tue, 27 Jun 2017 15:50:24 +0530
Subject: [PATCH 09/14] - Updated gradient_descent.py

---
 gradient_descent/gradient_descent.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 9ecda4a8..0fc8df00 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -23,8 +23,7 @@ def _hypothesis_value(data, parameter_vector):
     :param parameter_vector: Weight vector
     :return: Vector of values of hypothesis function for given data matrix.
     """
-    hyp_val_matrix = numpy.asmatrix(numpy.dot(data, parameter_vector))
-    return hyp_val_matrix
+    return numpy.asmatrix(numpy.dot(data, parameter_vector))
 
 
 def summation_of_cost_derivative(input_data, output_data, parameter_vector):
@@ -36,8 +35,7 @@ def summation_of_cost_derivative(input_data, output_data, parameter_vector):
 
     :return: Returns the summation of cost derivative
     """
-    summation_value = numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))
-    return summation_value
+    return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))
 
 
 def get_cost_derivative(train_data, train_output, parameter_vector):
@@ -49,9 +47,8 @@ def get_cost_derivative(train_data, train_output, parameter_vector):
     :return: derivative vector
     """
     train_data_size = len(train_data)
-    cost_derivative_value = summation_of_cost_derivative(train_data, train_output,
-                                                         parameter_vector)/train_data_size
-    return cost_derivative_value
+    return summation_of_cost_derivative(train_data, train_output,
+                                        parameter_vector)/train_data_size
 
 
 def run_gradient_descent(train_data, train_output, parameter_vector,
@@ -69,9 +66,7 @@ def run_gradient_descent(train_data, train_output, parameter_vector,
     :param relative_error_limit: Tolerance for error in training. It is relative to second parameter.
     :return: Optimized parameter vector.
     """
-    j = 0
     while True:
-        j += 1
         cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector)
         temp_parameter_vector = parameter_vector - \
             learning_rate*cost_derivative
@@ -136,8 +131,9 @@ def main():
     learning_rate = 0.00015
     absolute_error_limit = 0.000015
     relative_error_limit = 0
-    parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate,
-                                            absolute_error_limit, relative_error_limit)
+    parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector,
+                                            learning_rate, absolute_error_limit,
+                                            relative_error_limit)
     test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols)
     num_data = len(test_data)
     biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())

From 6da9dab384c1d457178d6f3aedcb22e1b81f18fb Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Tue, 27 Jun 2017 21:03:37 +0530
Subject: [PATCH 10/14] - Updated gradient_descent.py

---
 .../linear_regression/rock_aquifer_test.dat   |  6 --
 .../linear_regression/rock_aquifer_train.dat  | 25 ------
 gradient_descent/gradient_descent.py          | 89 +++++++------------
 3 files changed, 33 insertions(+), 87 deletions(-)
 delete mode 100644 .datasets/linear_regression/rock_aquifer_test.dat
 delete mode 100644 .datasets/linear_regression/rock_aquifer_train.dat

diff --git a/.datasets/linear_regression/rock_aquifer_test.dat b/.datasets/linear_regression/rock_aquifer_test.dat
deleted file mode 100644
index e5a030cd..00000000
--- a/.datasets/linear_regression/rock_aquifer_test.dat
+++ /dev/null
@@ -1,6 +0,0 @@
-       4  2.7760    0.74 -1.6989    2.82   97.18   11.09   0.991   11.46   0.936    1.64   95.54
-       4  2.7467    0.68 -2.0000   14.09   85.91   11.84   1.289   12.14   1.187    1.79   84.15
-       4  2.7691    1.70 -1.6989    8.60   91.40   10.25   1.169   10.70   1.180    1.89   89.50
-       4  2.7176    2.70 -1.5228   10.05   89.95   11.11   0.669   11.48   0.788    1.97   88.51
-       4  2.6685    2.90 -1.6989   22.08   77.92   11.81   1.152   12.13   1.128    1.74   76.17
-       4  2.8425    1.80 -1.6989   16.39   83.61   12.07   1.361   12.21   1.156    1.53   82.04
\ No newline at end of file
diff --git a/.datasets/linear_regression/rock_aquifer_train.dat b/.datasets/linear_regression/rock_aquifer_train.dat
deleted file mode 100644
index b528f4ec..00000000
--- a/.datasets/linear_regression/rock_aquifer_train.dat
+++ /dev/null
@@ -1,25 +0,0 @@
-       1  2.6975    0.83 -1.2218    4.43   95.57   11.88   1.563   12.19   1.379   74.44   21.08
-       1  2.6415    0.59 -1.6989    7.19   92.81   10.31   1.410   10.85   1.361   85.80    7.00
-       1  2.6457    0.67 -1.6989    2.17   97.83   12.51   1.690   12.92   1.759   93.71    4.13
-       1  2.6342    0.50 -1.6989    6.34   93.66   10.76   1.327   11.40   1.361   95.55    2.17
-       1  2.6236    1.10 -1.2218    2.24   97.76    8.62   1.560    8.92   1.427   77.38   20.41
-       2  2.7278    1.10 -1.6989    8.85   91.16    9.85   0.848   10.25   0.815    1.22   89.93
-       2  2.7480    2.80 -1.2218    7.72   92.28    9.60   1.045    9.85   1.052    2.02   90.26
-       2  2.6678    4.60 -1.7213    8.18   91.82    8.75   0.999    9.10   0.879    1.56   75.38
-       2  2.6949    3.50 -1.5228    5.41   94.59    9.93   0.910   10.24   0.848    1.27   93.28
-       2  2.7020    8.60 -1.3010   19.25   80.75   10.00   0.850   10.54   0.774    1.43   79.32
-       2  2.7360    0.81 -1.6989   15.60   84.40   11.41   0.826   11.76   0.840    1.52   82.84
-       2  2.7420    1.40 -2.0000   16.78   83.22   11.49   0.618   11.87   0.619    1.62   81.62
-       2  2.6832    2.70 -1.6989   26.29   73.71   10.79   0.747   11.13   0.688    2.36   71.35
-       3  2.7504    1.20 -1.5229    4.63   95.37   10.43   0.888   10.80   0.962    7.21   88.16
-       3  2.7194    0.91 -1.6989   23.55   76.45   11.56   0.681   11.92   0.675    1.99   74.46
-       3  2.6247    1.40 -1.3979    2.56   97.44   10.50   0.878   10.82   0.806   89.73    7.71
-       3  2.7223    3.00 -1.3010   10.54   89.46   10.74   0.942   11.08   0.711   92.19    7.27
-       3  2.6185    1.30 -1.5228    7.06   92.94   11.41   0.784   11.83   0.830   89.56    3.37
-       3  2.6395    0.18 -2.0458    3.10   96.90   12.12   0.804   12.61   0.918   91.76    5.18
-       3  2.6252    5.40 -1.6989   18.83   81.17   11.59   0.904   11.96   0.805    5.09   76.07
-       3  2.6531    0.30 -2.3010   12.65   87.35   11.51   0.849   11.85   0.749   54.47   32.87
-       3  2.6204    0.56 -1.6989    5.88   94.12   11.32   0.861   11.72   0.811   91.32    2.80
-       4  2.7691    0.93 -2.0000    2.92   97.08   10.57   0.640   10.97   0.587    1.42   95.65
-       4  2.7899    0.23 -1.6989    2.11   97.89   11.55   0.759   11.80   0.741    4.82   93.05
-       4  2.7602    1.50 -1.6989    3.87   96.13   10.35   1.367   10.66   1.211    1.92   94.20
\ No newline at end of file
diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 0fc8df00..5db90ef5 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -2,7 +2,6 @@
 Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
 """
 import numpy
-from os.path import expanduser
 import matplotlib.pyplot as plt
 
 
@@ -11,7 +10,7 @@ def _error(input_data, output_data, parameter_vector):
     :param input_data:  Data whose summation of cost derivative has to be calculated
     :param output_data: Output corresponding to training data
     :param parameter_vector: Weight vector
-    :return:
+    :return: Error in hypothesis value
     """
     return _hypothesis_value(input_data, parameter_vector) - output_data
 
@@ -26,29 +25,17 @@ def _hypothesis_value(data, parameter_vector):
     return numpy.asmatrix(numpy.dot(data, parameter_vector))
 
 
-def summation_of_cost_derivative(input_data, output_data, parameter_vector):
+def get_cost_derivative(input_data, output_data, parameter_vector):
     """
-    Calculates the sum of cost function derivative
-    :param input_data:  Data whose summation of cost derivative has to be calculated
-    :param output_data: Output corresponding to training data
-    :param parameter_vector: Weight vector
-
-    :return: Returns the summation of cost derivative
-    """
-    return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))
 
-
-def get_cost_derivative(train_data, train_output, parameter_vector):
-    """
-
-    :param train_data: Training data
-    :param train_output: Output corresponding to training data
+    :param input_data: Training or testing data
+    :param output_data: Output corresponding to training data
     :param parameter_vector: Weight vector
     :return: derivative vector
     """
-    train_data_size = len(train_data)
-    return summation_of_cost_derivative(train_data, train_output,
-                                        parameter_vector)/train_data_size
+    train_data_size = len(input_data)
+    return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))\
+        / train_data_size
 
 
 def run_gradient_descent(train_data, train_output, parameter_vector,
@@ -77,22 +64,23 @@ def run_gradient_descent(train_data, train_output, parameter_vector,
     return parameter_vector
 
 
-def test_gradient_descent(test_data, test_output, parameter_vector):
+def test_gradient_descent(input_data, output_data, parameter_vector):
     """
-    :param test_data: Input data to be tested
-    :param test_output: Actual Output data for Input dataset
+    :param input_data: Input data to be tested
+    :param output_data: Actual Output data for Input dataset
     :param parameter_vector: Weight vector after optimized by using gradient descent
     :return: None
     """
-    actual_output = test_output
-    hypothesis_output = _hypothesis_value(test_data,
+    actual_output = output_data
+    hypothesis_output = _hypothesis_value(input_data,
                                           parameter_vector=parameter_vector)
-    num_examples = len(test_output)
-    plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output')
-    plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output')
-    plt.xlabel('Test case')
+    num_examples = len(output_data)
+    plt.plot(range(num_examples), actual_output, 'r', label='Actual Output')
+    plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output')
+    plt.xlabel('Test example')
     plt.ylabel('Output Values')
-    plt.xlim([-1, 7])
+    plt.xlim([-1, len(input_data) + 2])
+    plt.ylim([-5, 200])
     plt.legend()
     plt.show()
 
@@ -102,44 +90,33 @@ def download_data():
     Downloads test and train data from GitHub repository
     """
     import requests
-    home = expanduser('~')
-    response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
-                            'linear_regression/rock_aquifer_train.dat')
-    if response:
-        with open(home+'/rock_aquifer_train.dat', 'wb') as f:
-            f.write(response.text)
-    response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
-                            'linear_regression/rock_aquifer_test.dat')
-    if response:
-        with open(home + '/rock_aquifer_test.dat', 'wb') as f:
-            f.write(response.text)
+    response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat')
+    train_data = []
+    train_output = []
+    data_matrix = response.text.split('\n')
+    for data_tuple in data_matrix:
+        data_tuple = data_tuple.split()
+        if data_tuple:
+            train_data.append(data_tuple[:11])
+            train_output.append(data_tuple[-1])
+    return numpy.asmatrix(train_data).astype(dtype='float'), \
+        numpy.asmatrix(train_output).astype(dtype='float')
 
 
 def main():
-    download_data()
-    home = expanduser('~')
-    input_cols = list(range(11))
-    train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
-                                              usecols=input_cols))
+    train_data, train_output = download_data()
     num_data = len(train_data)
     biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
     train_data = numpy.column_stack((biased_tuple, train_data))
-    output_cols = (11,)
-    train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
-                                                usecols=output_cols)).transpose()
+    train_output = train_output.transpose()
     parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
     learning_rate = 0.00015
-    absolute_error_limit = 0.000015
+    absolute_error_limit = 0.000015/4
     relative_error_limit = 0
     parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector,
                                             learning_rate, absolute_error_limit,
                                             relative_error_limit)
-    test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols)
-    num_data = len(test_data)
-    biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
-    test_data = numpy.column_stack((biased_tuple, test_data))
-    test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols)
-    test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector)
+    test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector)
 
 
 if __name__ == '__main__':

From c3d615004a4182f65e8ac42a685e7e298ff527c1 Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Wed, 28 Jun 2017 00:56:58 +0530
Subject: [PATCH 11/14] Update gradient_descent.py

---
 gradient_descent/gradient_descent.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 5db90ef5..25f89dea 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -2,6 +2,7 @@
 Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
 """
 import numpy
+import requests
 import matplotlib.pyplot as plt
 
 
@@ -27,7 +28,6 @@ def _hypothesis_value(data, parameter_vector):
 
 def get_cost_derivative(input_data, output_data, parameter_vector):
     """
-
     :param input_data: Training or testing data
     :param output_data: Output corresponding to training data
     :param parameter_vector: Weight vector
@@ -89,7 +89,6 @@ def download_data():
     """
     Downloads test and train data from GitHub repository
     """
-    import requests
     response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat')
     train_data = []
     train_output = []
@@ -111,7 +110,7 @@ def main():
     train_output = train_output.transpose()
     parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
     learning_rate = 0.00015
-    absolute_error_limit = 0.000015/4
+    absolute_error_limit = 0.000015
     relative_error_limit = 0
     parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector,
                                             learning_rate, absolute_error_limit,

From 3802026cfd8e0bd0827e763d3136191a6cee78df Mon Sep 17 00:00:00 2001
From: = <201551009@iiitvadodara.ac.in>
Date: Wed, 28 Jun 2017 23:05:15 +0530
Subject: [PATCH 12/14] - Graph is shown when sys argument is passed. - Added
 matplotlib in python3 requirements.txt

---
 gradient_descent/gradient_descent.py | 23 ++++++++++++++---------
 pip3-requirements.txt                |  1 +
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 25f89dea..7f9144eb 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -1,9 +1,12 @@
 """
+To view the plot, run as follows:
+python3 gradient_descent.py show-plot
 Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
 """
 import numpy
 import requests
 import matplotlib.pyplot as plt
+import sys
 
 
 def _error(input_data, output_data, parameter_vector):
@@ -74,15 +77,17 @@ def test_gradient_descent(input_data, output_data, parameter_vector):
     actual_output = output_data
     hypothesis_output = _hypothesis_value(input_data,
                                           parameter_vector=parameter_vector)
-    num_examples = len(output_data)
-    plt.plot(range(num_examples), actual_output, 'r', label='Actual Output')
-    plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output')
-    plt.xlabel('Test example')
-    plt.ylabel('Output Values')
-    plt.xlim([-1, len(input_data) + 2])
-    plt.ylim([-5, 200])
-    plt.legend()
-    plt.show()
+    if len(sys.argv) == 2:
+        if sys.argv[1] == 'show-plot':
+            num_examples = len(output_data)
+            plt.plot(range(num_examples), actual_output, 'r', label='Actual Output')
+            plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output')
+            plt.xlabel('Test example')
+            plt.ylabel('Output Values')
+            plt.xlim([-1, len(input_data) + 2])
+            plt.ylim([-5, 200])
+            plt.legend()
+            plt.show()
 
 
 def download_data():
diff --git a/pip3-requirements.txt b/pip3-requirements.txt
index 6a58861d..de48800f 100644
--- a/pip3-requirements.txt
+++ b/pip3-requirements.txt
@@ -1,3 +1,4 @@
 coala-bears
 numpy
 requests
+matplotlib

From 7a0b0c958eb26c34f7c0f98cb2418196e9987add Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Wed, 28 Jun 2017 23:17:06 +0530
Subject: [PATCH 13/14] Update gradient_descent.py

---
 gradient_descent/gradient_descent.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py
index 7f9144eb..85dc7143 100644
--- a/gradient_descent/gradient_descent.py
+++ b/gradient_descent/gradient_descent.py
@@ -57,7 +57,8 @@ def run_gradient_descent(train_data, train_output, parameter_vector,
     :return: Optimized parameter vector.
     """
     while True:
-        cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector)
+        cost_derivative = get_cost_derivative(
+            train_data, train_output, parameter_vector)
         temp_parameter_vector = parameter_vector - \
             learning_rate*cost_derivative
         if numpy.allclose(parameter_vector, temp_parameter_vector,
@@ -80,8 +81,10 @@ def test_gradient_descent(input_data, output_data, parameter_vector):
     if len(sys.argv) == 2:
         if sys.argv[1] == 'show-plot':
             num_examples = len(output_data)
-            plt.plot(range(num_examples), actual_output, 'r', label='Actual Output')
-            plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output')
+            plt.plot(range(num_examples), actual_output,
+                     'r', label='Actual Output')
+            plt.plot(range(num_examples), hypothesis_output,
+                     'g', label='Hypothesis Output')
             plt.xlabel('Test example')
             plt.ylabel('Output Values')
             plt.xlim([-1, len(input_data) + 2])
@@ -94,7 +97,8 @@ def download_data():
     """
     Downloads test and train data from GitHub repository
     """
-    response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat')
+    response = requests.get(
+        'http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat')
     train_data = []
     train_output = []
     data_matrix = response.text.split('\n')
@@ -110,17 +114,20 @@ def download_data():
 def main():
     train_data, train_output = download_data()
     num_data = len(train_data)
-    biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
+    biased_tuple = numpy.asmatrix(numpy.ones(
+        (1, num_data), dtype=float).transpose())
     train_data = numpy.column_stack((biased_tuple, train_data))
     train_output = train_output.transpose()
-    parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
+    parameter_vector = numpy.asmatrix(
+        [2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
     learning_rate = 0.00015
     absolute_error_limit = 0.000015
     relative_error_limit = 0
     parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector,
                                             learning_rate, absolute_error_limit,
                                             relative_error_limit)
-    test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector)
+    test_gradient_descent(train_data, train_output,
+                          parameter_vector=parameter_vector)
 
 
 if __name__ == '__main__':

From 5eea0f978489c93540b3a510952f71a74e8ef854 Mon Sep 17 00:00:00 2001
From: Prakash Rai <201551009@iiitvadodara.ac.in>
Date: Wed, 28 Jun 2017 23:33:34 +0530
Subject: [PATCH 14/14] Update pip2-requirements.txt

---
 pip2-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pip2-requirements.txt b/pip2-requirements.txt
index 945b4703..3dc5e338 100644
--- a/pip2-requirements.txt
+++ b/pip2-requirements.txt
@@ -1,2 +1,3 @@
 numpy
 requests
+matplotlib