From a64e36dd3164c5732adbdfdf89b1fffa99ef8eff Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 15:21:04 +0530 Subject: [PATCH 01/14] - Added Gradient Descent --- README.md | 1 + gradient descent/gradient_descent.py | 119 +++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 gradient descent/gradient_descent.py diff --git a/README.md b/README.md index 1dbb1892..9fd6214f 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Community (college) maintained list of Algorithms and Data Structures implementa | [Dijkstra Algorithm](https://en.wikipedia.org/wiki/Dijkstra's_algorithm/) | [:white_check_mark:](dijkstra/dijkstra.c) | | [:white_check_mark:](dijkstra/Dijkstra.java) | [:white_check_mark:](dijkstra/dijkstra.py) | | | | [Euclidean GCD](https://en.wikipedia.org/wiki/Euclidean_algorithm) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.c) | | [:white_check_mark:](euclidean_gcd/EuclideanGCD.java) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.py) | | [:white_check_mark:](euclidean_gcd/euclideanGCD.js) | | [Exponentiation by Squaring](https://en.wikipedia.org/wiki/Exponentiation_by_squaring) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.c) | | | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.py) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.go) | [:white_check_mark:](exponentiation_by_squaring/exponentiationBySquaring.js) | +| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](heap_sort/heap_sort.py) | | | | [Heap Sort](https://en.wikipedia.org/wiki/Heapsort) | [:white_check_mark:](heap_sort/heap_sort.c) | | [:white_check_mark:](heap_sort/HeapSort.java) | [:white_check_mark:](heap_sort/heap_sort.py) | | | | [Insertion Sort](https://en.wikipedia.org/wiki/Insertion_sort) | [:white_check_mark:](insertion_sort/insertion_sort.c) | | [:white_check_mark:](insertion_sort/InsertionSort.java)| [:white_check_mark:](insertion_sort/insertion_sort.py) | [:white_check_mark:](insertion_sort/insertion_sort.go) | | | [k-NN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) | | | | [:white_check_mark:](k_nn/k_nn.py) | | | diff --git a/gradient descent/gradient_descent.py b/gradient descent/gradient_descent.py new file mode 100644 index 00000000..38f1d1d8 --- /dev/null +++ b/gradient descent/gradient_descent.py @@ -0,0 +1,119 @@ +""" +Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. +""" +import numpy + +# List of input, output pairs +train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) +test_data = (((515, 22, 13), 555), ((61, 35, 49), 150)) +parameter_vector = [2, 4, 1, 5] +m = len(train_data) +LEARNING_RATE = 0.009 + + +def _error(example_no, data_set='train'): + """ + :param data_set: train data or test data + :param example_no: example number whose error has to be checked + :return: error in example pointed by example number. + """ + return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set) + + +def _hypothesis_value(data_input_tuple): + """ + Calculates hypothesis function value for a given input + :param data_input_tuple: Input tuple of a particular example + :return: Value of hypothesis function at that point. + Note that parameter input value is fixed as 1. + Also known as 'biased input' inn ML terminology and the parameter associated with it + is known as 'biased parameter'. + """ + hyp_val = 0 + for i in range(len(parameter_vector) - 1): + hyp_val = hyp_val + data_input_tuple[i]*parameter_vector[i+1] + hyp_val = hyp_val + 1*parameter_vector[0] + return hyp_val + + +def output(example_no, data_set): + """ + :param data_set: test data or train data + :param example_no: example whose output is to be fetched + :return: output for that example + """ + if data_set == 'train': + return train_data[example_no][1] + elif data_set == 'test': + return test_data[example_no][1] + + +def calculate_hypothesis_value(example_no, data_set): + """ + Calculates hypothesis value for a given example + :param data_set: test data or train_data + :param example_no: example whose hypothesis value is to be calculated + :return: hypothesis value for that example + """ + if data_set == "train": + return _hypothesis_value(train_data[example_no][0]) + elif data_set == "test": + return _hypothesis_value(test_data[example_no][0]) + + +def summation_of_cost_derivative(index, end=m): + """ + Calculates the sum of cost function derivative + :param index: index wrt derivative is being calculated + :param end: value where summation ends, default is m, number of examples + :return: Returns the summation of cost derivative + Note: If index is -1, this means we are calculcating summation wrt to biased parameter. + """ + summation_value = 0 + for i in range(end): + if index == -1: + summation_value += _error(i) + else: + summation_value += _error(i)*train_data[i][0][index] + return summation_value + + +def get_cost_derivative(index): + """ + :param index: index of the parameter vector wrt to derivative is to be calculated + :return: derivative wrt to that index + Note: If index is -1, this means we are calculcating summation wrt to biased parameter. + """ + cost_derivative_value = summation_of_cost_derivative(index, m)/m + return cost_derivative_value + + +def run_gradient_descent(): + global parameter_vector + # Tune these values to set a tolerance value for predicted output + absolute_error_limit = 0.000002 + relative_error_limit = 0 + j = 0 + while True: + j = j+1 + temp_parameter_vector = [0, 0, 0, 0] + for i in range(0, len(parameter_vector)): + cost_derivative = get_cost_derivative(i-1) + temp_parameter_vector[i] = parameter_vector[i] - LEARNING_RATE*cost_derivative + if numpy.allclose(parameter_vector, temp_parameter_vector, + atol=absolute_error_limit, rtol=relative_error_limit): + break + parameter_vector = temp_parameter_vector + print("Number of iterations:", j) + + +def test_gradient_descent(): + for i in range(len(test_data)): + print("Actual output value:", output(i, 'test')) + print("Hypothesis output:", calculate_hypothesis_value(i, 'test')) + + +if __name__ == '__main__': + run_gradient_descent() + print("\nTesting gradient descent for a linear hypothesis function.\n") + test_gradient_descent() From 84c1a6d64cde9471f9cfb270360ec5c4954233d0 Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 15:24:46 +0530 Subject: [PATCH 02/14] - Fixed typo --- README.md | 2 +- {gradient descent => gradient_descent}/gradient_descent.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {gradient descent => gradient_descent}/gradient_descent.py (100%) diff --git a/README.md b/README.md index 9fd6214f..b571fe67 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Community (college) maintained list of Algorithms and Data Structures implementa | [Dijkstra Algorithm](https://en.wikipedia.org/wiki/Dijkstra's_algorithm/) | [:white_check_mark:](dijkstra/dijkstra.c) | | [:white_check_mark:](dijkstra/Dijkstra.java) | [:white_check_mark:](dijkstra/dijkstra.py) | | | | [Euclidean GCD](https://en.wikipedia.org/wiki/Euclidean_algorithm) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.c) | | [:white_check_mark:](euclidean_gcd/EuclideanGCD.java) | [:white_check_mark:](euclidean_gcd/euclidean_gcd.py) | | [:white_check_mark:](euclidean_gcd/euclideanGCD.js) | | [Exponentiation by Squaring](https://en.wikipedia.org/wiki/Exponentiation_by_squaring) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.c) | | | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.py) | [:white_check_mark:](exponentiation_by_squaring/exponentiation_by_squaring.go) | [:white_check_mark:](exponentiation_by_squaring/exponentiationBySquaring.js) | -| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](heap_sort/heap_sort.py) | | | +| [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) | | | | [:white_check_mark:](gradient_descent/gradient_descent.py) | | | | [Heap Sort](https://en.wikipedia.org/wiki/Heapsort) | [:white_check_mark:](heap_sort/heap_sort.c) | | [:white_check_mark:](heap_sort/HeapSort.java) | [:white_check_mark:](heap_sort/heap_sort.py) | | | | [Insertion Sort](https://en.wikipedia.org/wiki/Insertion_sort) | [:white_check_mark:](insertion_sort/insertion_sort.c) | | [:white_check_mark:](insertion_sort/InsertionSort.java)| [:white_check_mark:](insertion_sort/insertion_sort.py) | [:white_check_mark:](insertion_sort/insertion_sort.go) | | | [k-NN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm) | | | | [:white_check_mark:](k_nn/k_nn.py) | | | diff --git a/gradient descent/gradient_descent.py b/gradient_descent/gradient_descent.py similarity index 100% rename from gradient descent/gradient_descent.py rename to gradient_descent/gradient_descent.py From 9ddb8b98c32ae032206791714bb71805e65a1e91 Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 15:43:24 +0530 Subject: [PATCH 03/14] - Changed gradient_descent.py according to PEP8 guidelines. --- gradient_descent/gradient_descent.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 38f1d1d8..14d50da9 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -4,7 +4,8 @@ import numpy # List of input, output pairs -train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) +train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), + ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) test_data = (((515, 22, 13), 555), ((61, 35, 49), 150)) parameter_vector = [2, 4, 1, 5] m = len(train_data) @@ -99,7 +100,8 @@ def run_gradient_descent(): temp_parameter_vector = [0, 0, 0, 0] for i in range(0, len(parameter_vector)): cost_derivative = get_cost_derivative(i-1) - temp_parameter_vector[i] = parameter_vector[i] - LEARNING_RATE*cost_derivative + temp_parameter_vector[i] = parameter_vector[i] - \ + LEARNING_RATE*cost_derivative if numpy.allclose(parameter_vector, temp_parameter_vector, atol=absolute_error_limit, rtol=relative_error_limit): break From f3cb6918ac815a6ae38c46472f3bed06e2d20e3d Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 17:16:53 +0530 Subject: [PATCH 04/14] Fixed typos in comments --- gradient_descent/gradient_descent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 14d50da9..2254d737 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -27,7 +27,7 @@ def _hypothesis_value(data_input_tuple): :param data_input_tuple: Input tuple of a particular example :return: Value of hypothesis function at that point. Note that parameter input value is fixed as 1. - Also known as 'biased input' inn ML terminology and the parameter associated with it + Also known as 'biased input' in ML terminology and the parameter associated with it is known as 'biased parameter'. """ hyp_val = 0 @@ -68,7 +68,7 @@ def summation_of_cost_derivative(index, end=m): :param index: index wrt derivative is being calculated :param end: value where summation ends, default is m, number of examples :return: Returns the summation of cost derivative - Note: If index is -1, this means we are calculcating summation wrt to biased parameter. + Note: If index is -1, this means we are calculating summation wrt to biased parameter. """ summation_value = 0 for i in range(end): @@ -83,7 +83,7 @@ def get_cost_derivative(index): """ :param index: index of the parameter vector wrt to derivative is to be calculated :return: derivative wrt to that index - Note: If index is -1, this means we are calculcating summation wrt to biased parameter. + Note: If index is -1, this means we are calculating summation wrt to biased parameter. """ cost_derivative_value = summation_of_cost_derivative(index, m)/m return cost_derivative_value From 3f7c2d1cefda68874e0f0b3c88ca4f5a3206717d Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 18:37:48 +0530 Subject: [PATCH 05/14] Update gradient_descent.py --- gradient_descent/gradient_descent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 2254d737..39dbeecc 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -96,7 +96,7 @@ def run_gradient_descent(): relative_error_limit = 0 j = 0 while True: - j = j+1 + j += 1 temp_parameter_vector = [0, 0, 0, 0] for i in range(0, len(parameter_vector)): cost_derivative = get_cost_derivative(i-1) @@ -111,7 +111,7 @@ def run_gradient_descent(): def test_gradient_descent(): for i in range(len(test_data)): - print("Actual output value:", output(i, 'test')) + print("Actual output value:", output(i, 'test')) print("Hypothesis output:", calculate_hypothesis_value(i, 'test')) From 29ce52d76fccbb81357d19f44531e7c660eb2605 Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 19:04:13 +0530 Subject: [PATCH 06/14] Update gradient_descent.py --- gradient_descent/gradient_descent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 39dbeecc..ebd29344 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -26,14 +26,14 @@ def _hypothesis_value(data_input_tuple): Calculates hypothesis function value for a given input :param data_input_tuple: Input tuple of a particular example :return: Value of hypothesis function at that point. - Note that parameter input value is fixed as 1. - Also known as 'biased input' in ML terminology and the parameter associated with it - is known as 'biased parameter'. + Note that there is an 'biased input' whose value is fixed as 1. + It is not explicitly mentioned in input data.. But, ML hypothesis functions use it. + So, we have to take care of it separately. Line 36 takes care of it. """ hyp_val = 0 for i in range(len(parameter_vector) - 1): - hyp_val = hyp_val + data_input_tuple[i]*parameter_vector[i+1] - hyp_val = hyp_val + 1*parameter_vector[0] + hyp_val += data_input_tuple[i]*parameter_vector[i+1] + hyp_val += parameter_vector[0] return hyp_val From 3556dc3048f387ec8fa042fdf5c9b7cccf949afb Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Thu, 22 Jun 2017 21:12:44 +0530 Subject: [PATCH 07/14] Update gradient_descent.py --- gradient_descent/gradient_descent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index ebd29344..1e771b07 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -28,7 +28,7 @@ def _hypothesis_value(data_input_tuple): :return: Value of hypothesis function at that point. Note that there is an 'biased input' whose value is fixed as 1. It is not explicitly mentioned in input data.. But, ML hypothesis functions use it. - So, we have to take care of it separately. Line 36 takes care of it. + So, we have to take care of it separately. Line 36 takes care of it. """ hyp_val = 0 for i in range(len(parameter_vector) - 1): From 77c58df42339ad3354494b7c6fc527f0dd03367f Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Tue, 27 Jun 2017 15:41:24 +0530 Subject: [PATCH 08/14] - Modified gradient_descent.py - Added open source datasets --- .../linear_regression/rock_aquifer_test.dat | 6 + .../linear_regression/rock_aquifer_train.dat | 25 +++ gradient_descent/gradient_descent.py | 195 ++++++++++-------- 3 files changed, 143 insertions(+), 83 deletions(-) create mode 100644 .datasets/linear_regression/rock_aquifer_test.dat create mode 100644 .datasets/linear_regression/rock_aquifer_train.dat diff --git a/.datasets/linear_regression/rock_aquifer_test.dat b/.datasets/linear_regression/rock_aquifer_test.dat new file mode 100644 index 00000000..e5a030cd --- /dev/null +++ b/.datasets/linear_regression/rock_aquifer_test.dat @@ -0,0 +1,6 @@ + 4 2.7760 0.74 -1.6989 2.82 97.18 11.09 0.991 11.46 0.936 1.64 95.54 + 4 2.7467 0.68 -2.0000 14.09 85.91 11.84 1.289 12.14 1.187 1.79 84.15 + 4 2.7691 1.70 -1.6989 8.60 91.40 10.25 1.169 10.70 1.180 1.89 89.50 + 4 2.7176 2.70 -1.5228 10.05 89.95 11.11 0.669 11.48 0.788 1.97 88.51 + 4 2.6685 2.90 -1.6989 22.08 77.92 11.81 1.152 12.13 1.128 1.74 76.17 + 4 2.8425 1.80 -1.6989 16.39 83.61 12.07 1.361 12.21 1.156 1.53 82.04 \ No newline at end of file diff --git a/.datasets/linear_regression/rock_aquifer_train.dat b/.datasets/linear_regression/rock_aquifer_train.dat new file mode 100644 index 00000000..b528f4ec --- /dev/null +++ b/.datasets/linear_regression/rock_aquifer_train.dat @@ -0,0 +1,25 @@ + 1 2.6975 0.83 -1.2218 4.43 95.57 11.88 1.563 12.19 1.379 74.44 21.08 + 1 2.6415 0.59 -1.6989 7.19 92.81 10.31 1.410 10.85 1.361 85.80 7.00 + 1 2.6457 0.67 -1.6989 2.17 97.83 12.51 1.690 12.92 1.759 93.71 4.13 + 1 2.6342 0.50 -1.6989 6.34 93.66 10.76 1.327 11.40 1.361 95.55 2.17 + 1 2.6236 1.10 -1.2218 2.24 97.76 8.62 1.560 8.92 1.427 77.38 20.41 + 2 2.7278 1.10 -1.6989 8.85 91.16 9.85 0.848 10.25 0.815 1.22 89.93 + 2 2.7480 2.80 -1.2218 7.72 92.28 9.60 1.045 9.85 1.052 2.02 90.26 + 2 2.6678 4.60 -1.7213 8.18 91.82 8.75 0.999 9.10 0.879 1.56 75.38 + 2 2.6949 3.50 -1.5228 5.41 94.59 9.93 0.910 10.24 0.848 1.27 93.28 + 2 2.7020 8.60 -1.3010 19.25 80.75 10.00 0.850 10.54 0.774 1.43 79.32 + 2 2.7360 0.81 -1.6989 15.60 84.40 11.41 0.826 11.76 0.840 1.52 82.84 + 2 2.7420 1.40 -2.0000 16.78 83.22 11.49 0.618 11.87 0.619 1.62 81.62 + 2 2.6832 2.70 -1.6989 26.29 73.71 10.79 0.747 11.13 0.688 2.36 71.35 + 3 2.7504 1.20 -1.5229 4.63 95.37 10.43 0.888 10.80 0.962 7.21 88.16 + 3 2.7194 0.91 -1.6989 23.55 76.45 11.56 0.681 11.92 0.675 1.99 74.46 + 3 2.6247 1.40 -1.3979 2.56 97.44 10.50 0.878 10.82 0.806 89.73 7.71 + 3 2.7223 3.00 -1.3010 10.54 89.46 10.74 0.942 11.08 0.711 92.19 7.27 + 3 2.6185 1.30 -1.5228 7.06 92.94 11.41 0.784 11.83 0.830 89.56 3.37 + 3 2.6395 0.18 -2.0458 3.10 96.90 12.12 0.804 12.61 0.918 91.76 5.18 + 3 2.6252 5.40 -1.6989 18.83 81.17 11.59 0.904 11.96 0.805 5.09 76.07 + 3 2.6531 0.30 -2.3010 12.65 87.35 11.51 0.849 11.85 0.749 54.47 32.87 + 3 2.6204 0.56 -1.6989 5.88 94.12 11.32 0.861 11.72 0.811 91.32 2.80 + 4 2.7691 0.93 -2.0000 2.92 97.08 10.57 0.640 10.97 0.587 1.42 95.65 + 4 2.7899 0.23 -1.6989 2.11 97.89 11.55 0.759 11.80 0.741 4.82 93.05 + 4 2.7602 1.50 -1.6989 3.87 96.13 10.35 1.367 10.66 1.211 1.92 94.20 \ No newline at end of file diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 1e771b07..9ecda4a8 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -2,120 +2,149 @@ Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. """ import numpy +from os.path import expanduser +import matplotlib.pyplot as plt -# List of input, output pairs -train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), - ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) -test_data = (((515, 22, 13), 555), ((61, 35, 49), 150)) -parameter_vector = [2, 4, 1, 5] -m = len(train_data) -LEARNING_RATE = 0.009 - -def _error(example_no, data_set='train'): - """ - :param data_set: train data or test data - :param example_no: example number whose error has to be checked - :return: error in example pointed by example number. - """ - return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set) - - -def _hypothesis_value(data_input_tuple): +def _error(input_data, output_data, parameter_vector): """ - Calculates hypothesis function value for a given input - :param data_input_tuple: Input tuple of a particular example - :return: Value of hypothesis function at that point. - Note that there is an 'biased input' whose value is fixed as 1. - It is not explicitly mentioned in input data.. But, ML hypothesis functions use it. - So, we have to take care of it separately. Line 36 takes care of it. + :param input_data: Data whose summation of cost derivative has to be calculated + :param output_data: Output corresponding to training data + :param parameter_vector: Weight vector + :return: """ - hyp_val = 0 - for i in range(len(parameter_vector) - 1): - hyp_val += data_input_tuple[i]*parameter_vector[i+1] - hyp_val += parameter_vector[0] - return hyp_val + return _hypothesis_value(input_data, parameter_vector) - output_data -def output(example_no, data_set): +def _hypothesis_value(data, parameter_vector): """ - :param data_set: test data or train data - :param example_no: example whose output is to be fetched - :return: output for that example + Calculates hypothesis function value for a given data + :param data: Data whose hypothesis value has to be calculated + :param parameter_vector: Weight vector + :return: Vector of values of hypothesis function for given data matrix. """ - if data_set == 'train': - return train_data[example_no][1] - elif data_set == 'test': - return test_data[example_no][1] + hyp_val_matrix = numpy.asmatrix(numpy.dot(data, parameter_vector)) + return hyp_val_matrix -def calculate_hypothesis_value(example_no, data_set): - """ - Calculates hypothesis value for a given example - :param data_set: test data or train_data - :param example_no: example whose hypothesis value is to be calculated - :return: hypothesis value for that example - """ - if data_set == "train": - return _hypothesis_value(train_data[example_no][0]) - elif data_set == "test": - return _hypothesis_value(test_data[example_no][0]) - - -def summation_of_cost_derivative(index, end=m): +def summation_of_cost_derivative(input_data, output_data, parameter_vector): """ Calculates the sum of cost function derivative - :param index: index wrt derivative is being calculated - :param end: value where summation ends, default is m, number of examples + :param input_data: Data whose summation of cost derivative has to be calculated + :param output_data: Output corresponding to training data + :param parameter_vector: Weight vector + :return: Returns the summation of cost derivative - Note: If index is -1, this means we are calculating summation wrt to biased parameter. """ - summation_value = 0 - for i in range(end): - if index == -1: - summation_value += _error(i) - else: - summation_value += _error(i)*train_data[i][0][index] + summation_value = numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector)) return summation_value -def get_cost_derivative(index): +def get_cost_derivative(train_data, train_output, parameter_vector): """ - :param index: index of the parameter vector wrt to derivative is to be calculated - :return: derivative wrt to that index - Note: If index is -1, this means we are calculating summation wrt to biased parameter. + + :param train_data: Training data + :param train_output: Output corresponding to training data + :param parameter_vector: Weight vector + :return: derivative vector """ - cost_derivative_value = summation_of_cost_derivative(index, m)/m + train_data_size = len(train_data) + cost_derivative_value = summation_of_cost_derivative(train_data, train_output, + parameter_vector)/train_data_size return cost_derivative_value -def run_gradient_descent(): - global parameter_vector - # Tune these values to set a tolerance value for predicted output - absolute_error_limit = 0.000002 - relative_error_limit = 0 +def run_gradient_descent(train_data, train_output, parameter_vector, + learning_rate, absolute_error_limit, + relative_error_limit): + """ + Runs gradient descent on given training data and optimizes + parameters + :param train_data: Training data. Type: Matrix. + :param train_output: Output corresponding to each training data. Type: Vector, + may be matrix + :param parameter_vector: Randomly initialized weight vector + :param learning_rate: Rate at which gradient descent learns + :param absolute_error_limit: Tolerance for error in training. + :param relative_error_limit: Tolerance for error in training. It is relative to second parameter. + :return: Optimized parameter vector. + """ j = 0 while True: j += 1 - temp_parameter_vector = [0, 0, 0, 0] - for i in range(0, len(parameter_vector)): - cost_derivative = get_cost_derivative(i-1) - temp_parameter_vector[i] = parameter_vector[i] - \ - LEARNING_RATE*cost_derivative + cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector) + temp_parameter_vector = parameter_vector - \ + learning_rate*cost_derivative if numpy.allclose(parameter_vector, temp_parameter_vector, atol=absolute_error_limit, rtol=relative_error_limit): break parameter_vector = temp_parameter_vector - print("Number of iterations:", j) + return parameter_vector -def test_gradient_descent(): - for i in range(len(test_data)): - print("Actual output value:", output(i, 'test')) - print("Hypothesis output:", calculate_hypothesis_value(i, 'test')) +def test_gradient_descent(test_data, test_output, parameter_vector): + """ + :param test_data: Input data to be tested + :param test_output: Actual Output data for Input dataset + :param parameter_vector: Weight vector after optimized by using gradient descent + :return: None + """ + actual_output = test_output + hypothesis_output = _hypothesis_value(test_data, + parameter_vector=parameter_vector) + num_examples = len(test_output) + plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output') + plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output') + plt.xlabel('Test case') + plt.ylabel('Output Values') + plt.xlim([-1, 7]) + plt.legend() + plt.show() + + +def download_data(): + """ + Downloads test and train data from GitHub repository + """ + import requests + home = expanduser('~') + response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' + 'linear_regression/rock_aquifer_train.dat') + if response: + with open(home+'/rock_aquifer_train.dat', 'wb') as f: + f.write(response.text) + response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' + 'linear_regression/rock_aquifer_test.dat') + if response: + with open(home + '/rock_aquifer_test.dat', 'wb') as f: + f.write(response.text) + + +def main(): + download_data() + home = expanduser('~') + input_cols = list(range(11)) + train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', + usecols=input_cols)) + num_data = len(train_data) + biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) + train_data = numpy.column_stack((biased_tuple, train_data)) + output_cols = (11,) + train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', + usecols=output_cols)).transpose() + parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() + learning_rate = 0.00015 + absolute_error_limit = 0.000015 + relative_error_limit = 0 + parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate, + absolute_error_limit, relative_error_limit) + test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols) + num_data = len(test_data) + biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) + test_data = numpy.column_stack((biased_tuple, test_data)) + test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols) + test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector) if __name__ == '__main__': - run_gradient_descent() - print("\nTesting gradient descent for a linear hypothesis function.\n") - test_gradient_descent() + main() From 1e0a530fbe5bad14b228c29968fb344abc351aa7 Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Tue, 27 Jun 2017 15:50:24 +0530 Subject: [PATCH 09/14] - Updated gradient_descent.py --- gradient_descent/gradient_descent.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 9ecda4a8..0fc8df00 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -23,8 +23,7 @@ def _hypothesis_value(data, parameter_vector): :param parameter_vector: Weight vector :return: Vector of values of hypothesis function for given data matrix. """ - hyp_val_matrix = numpy.asmatrix(numpy.dot(data, parameter_vector)) - return hyp_val_matrix + return numpy.asmatrix(numpy.dot(data, parameter_vector)) def summation_of_cost_derivative(input_data, output_data, parameter_vector): @@ -36,8 +35,7 @@ def summation_of_cost_derivative(input_data, output_data, parameter_vector): :return: Returns the summation of cost derivative """ - summation_value = numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector)) - return summation_value + return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector)) def get_cost_derivative(train_data, train_output, parameter_vector): @@ -49,9 +47,8 @@ def get_cost_derivative(train_data, train_output, parameter_vector): :return: derivative vector """ train_data_size = len(train_data) - cost_derivative_value = summation_of_cost_derivative(train_data, train_output, - parameter_vector)/train_data_size - return cost_derivative_value + return summation_of_cost_derivative(train_data, train_output, + parameter_vector)/train_data_size def run_gradient_descent(train_data, train_output, parameter_vector, @@ -69,9 +66,7 @@ def run_gradient_descent(train_data, train_output, parameter_vector, :param relative_error_limit: Tolerance for error in training. It is relative to second parameter. :return: Optimized parameter vector. """ - j = 0 while True: - j += 1 cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector) temp_parameter_vector = parameter_vector - \ learning_rate*cost_derivative @@ -136,8 +131,9 @@ def main(): learning_rate = 0.00015 absolute_error_limit = 0.000015 relative_error_limit = 0 - parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate, - absolute_error_limit, relative_error_limit) + parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, + learning_rate, absolute_error_limit, + relative_error_limit) test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols) num_data = len(test_data) biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) From 6da9dab384c1d457178d6f3aedcb22e1b81f18fb Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Tue, 27 Jun 2017 21:03:37 +0530 Subject: [PATCH 10/14] - Updated gradient_descent.py --- .../linear_regression/rock_aquifer_test.dat | 6 -- .../linear_regression/rock_aquifer_train.dat | 25 ------ gradient_descent/gradient_descent.py | 89 +++++++------------ 3 files changed, 33 insertions(+), 87 deletions(-) delete mode 100644 .datasets/linear_regression/rock_aquifer_test.dat delete mode 100644 .datasets/linear_regression/rock_aquifer_train.dat diff --git a/.datasets/linear_regression/rock_aquifer_test.dat b/.datasets/linear_regression/rock_aquifer_test.dat deleted file mode 100644 index e5a030cd..00000000 --- a/.datasets/linear_regression/rock_aquifer_test.dat +++ /dev/null @@ -1,6 +0,0 @@ - 4 2.7760 0.74 -1.6989 2.82 97.18 11.09 0.991 11.46 0.936 1.64 95.54 - 4 2.7467 0.68 -2.0000 14.09 85.91 11.84 1.289 12.14 1.187 1.79 84.15 - 4 2.7691 1.70 -1.6989 8.60 91.40 10.25 1.169 10.70 1.180 1.89 89.50 - 4 2.7176 2.70 -1.5228 10.05 89.95 11.11 0.669 11.48 0.788 1.97 88.51 - 4 2.6685 2.90 -1.6989 22.08 77.92 11.81 1.152 12.13 1.128 1.74 76.17 - 4 2.8425 1.80 -1.6989 16.39 83.61 12.07 1.361 12.21 1.156 1.53 82.04 \ No newline at end of file diff --git a/.datasets/linear_regression/rock_aquifer_train.dat b/.datasets/linear_regression/rock_aquifer_train.dat deleted file mode 100644 index b528f4ec..00000000 --- a/.datasets/linear_regression/rock_aquifer_train.dat +++ /dev/null @@ -1,25 +0,0 @@ - 1 2.6975 0.83 -1.2218 4.43 95.57 11.88 1.563 12.19 1.379 74.44 21.08 - 1 2.6415 0.59 -1.6989 7.19 92.81 10.31 1.410 10.85 1.361 85.80 7.00 - 1 2.6457 0.67 -1.6989 2.17 97.83 12.51 1.690 12.92 1.759 93.71 4.13 - 1 2.6342 0.50 -1.6989 6.34 93.66 10.76 1.327 11.40 1.361 95.55 2.17 - 1 2.6236 1.10 -1.2218 2.24 97.76 8.62 1.560 8.92 1.427 77.38 20.41 - 2 2.7278 1.10 -1.6989 8.85 91.16 9.85 0.848 10.25 0.815 1.22 89.93 - 2 2.7480 2.80 -1.2218 7.72 92.28 9.60 1.045 9.85 1.052 2.02 90.26 - 2 2.6678 4.60 -1.7213 8.18 91.82 8.75 0.999 9.10 0.879 1.56 75.38 - 2 2.6949 3.50 -1.5228 5.41 94.59 9.93 0.910 10.24 0.848 1.27 93.28 - 2 2.7020 8.60 -1.3010 19.25 80.75 10.00 0.850 10.54 0.774 1.43 79.32 - 2 2.7360 0.81 -1.6989 15.60 84.40 11.41 0.826 11.76 0.840 1.52 82.84 - 2 2.7420 1.40 -2.0000 16.78 83.22 11.49 0.618 11.87 0.619 1.62 81.62 - 2 2.6832 2.70 -1.6989 26.29 73.71 10.79 0.747 11.13 0.688 2.36 71.35 - 3 2.7504 1.20 -1.5229 4.63 95.37 10.43 0.888 10.80 0.962 7.21 88.16 - 3 2.7194 0.91 -1.6989 23.55 76.45 11.56 0.681 11.92 0.675 1.99 74.46 - 3 2.6247 1.40 -1.3979 2.56 97.44 10.50 0.878 10.82 0.806 89.73 7.71 - 3 2.7223 3.00 -1.3010 10.54 89.46 10.74 0.942 11.08 0.711 92.19 7.27 - 3 2.6185 1.30 -1.5228 7.06 92.94 11.41 0.784 11.83 0.830 89.56 3.37 - 3 2.6395 0.18 -2.0458 3.10 96.90 12.12 0.804 12.61 0.918 91.76 5.18 - 3 2.6252 5.40 -1.6989 18.83 81.17 11.59 0.904 11.96 0.805 5.09 76.07 - 3 2.6531 0.30 -2.3010 12.65 87.35 11.51 0.849 11.85 0.749 54.47 32.87 - 3 2.6204 0.56 -1.6989 5.88 94.12 11.32 0.861 11.72 0.811 91.32 2.80 - 4 2.7691 0.93 -2.0000 2.92 97.08 10.57 0.640 10.97 0.587 1.42 95.65 - 4 2.7899 0.23 -1.6989 2.11 97.89 11.55 0.759 11.80 0.741 4.82 93.05 - 4 2.7602 1.50 -1.6989 3.87 96.13 10.35 1.367 10.66 1.211 1.92 94.20 \ No newline at end of file diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 0fc8df00..5db90ef5 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -2,7 +2,6 @@ Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. """ import numpy -from os.path import expanduser import matplotlib.pyplot as plt @@ -11,7 +10,7 @@ def _error(input_data, output_data, parameter_vector): :param input_data: Data whose summation of cost derivative has to be calculated :param output_data: Output corresponding to training data :param parameter_vector: Weight vector - :return: + :return: Error in hypothesis value """ return _hypothesis_value(input_data, parameter_vector) - output_data @@ -26,29 +25,17 @@ def _hypothesis_value(data, parameter_vector): return numpy.asmatrix(numpy.dot(data, parameter_vector)) -def summation_of_cost_derivative(input_data, output_data, parameter_vector): +def get_cost_derivative(input_data, output_data, parameter_vector): """ - Calculates the sum of cost function derivative - :param input_data: Data whose summation of cost derivative has to be calculated - :param output_data: Output corresponding to training data - :param parameter_vector: Weight vector - - :return: Returns the summation of cost derivative - """ - return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector)) - -def get_cost_derivative(train_data, train_output, parameter_vector): - """ - - :param train_data: Training data - :param train_output: Output corresponding to training data + :param input_data: Training or testing data + :param output_data: Output corresponding to training data :param parameter_vector: Weight vector :return: derivative vector """ - train_data_size = len(train_data) - return summation_of_cost_derivative(train_data, train_output, - parameter_vector)/train_data_size + train_data_size = len(input_data) + return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))\ + / train_data_size def run_gradient_descent(train_data, train_output, parameter_vector, @@ -77,22 +64,23 @@ def run_gradient_descent(train_data, train_output, parameter_vector, return parameter_vector -def test_gradient_descent(test_data, test_output, parameter_vector): +def test_gradient_descent(input_data, output_data, parameter_vector): """ - :param test_data: Input data to be tested - :param test_output: Actual Output data for Input dataset + :param input_data: Input data to be tested + :param output_data: Actual Output data for Input dataset :param parameter_vector: Weight vector after optimized by using gradient descent :return: None """ - actual_output = test_output - hypothesis_output = _hypothesis_value(test_data, + actual_output = output_data + hypothesis_output = _hypothesis_value(input_data, parameter_vector=parameter_vector) - num_examples = len(test_output) - plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output') - plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output') - plt.xlabel('Test case') + num_examples = len(output_data) + plt.plot(range(num_examples), actual_output, 'r', label='Actual Output') + plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output') + plt.xlabel('Test example') plt.ylabel('Output Values') - plt.xlim([-1, 7]) + plt.xlim([-1, len(input_data) + 2]) + plt.ylim([-5, 200]) plt.legend() plt.show() @@ -102,44 +90,33 @@ def download_data(): Downloads test and train data from GitHub repository """ import requests - home = expanduser('~') - response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' - 'linear_regression/rock_aquifer_train.dat') - if response: - with open(home+'/rock_aquifer_train.dat', 'wb') as f: - f.write(response.text) - response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' - 'linear_regression/rock_aquifer_test.dat') - if response: - with open(home + '/rock_aquifer_test.dat', 'wb') as f: - f.write(response.text) + response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat') + train_data = [] + train_output = [] + data_matrix = response.text.split('\n') + for data_tuple in data_matrix: + data_tuple = data_tuple.split() + if data_tuple: + train_data.append(data_tuple[:11]) + train_output.append(data_tuple[-1]) + return numpy.asmatrix(train_data).astype(dtype='float'), \ + numpy.asmatrix(train_output).astype(dtype='float') def main(): - download_data() - home = expanduser('~') - input_cols = list(range(11)) - train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', - usecols=input_cols)) + train_data, train_output = download_data() num_data = len(train_data) biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) train_data = numpy.column_stack((biased_tuple, train_data)) - output_cols = (11,) - train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', - usecols=output_cols)).transpose() + train_output = train_output.transpose() parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() learning_rate = 0.00015 - absolute_error_limit = 0.000015 + absolute_error_limit = 0.000015/4 relative_error_limit = 0 parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate, absolute_error_limit, relative_error_limit) - test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols) - num_data = len(test_data) - biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) - test_data = numpy.column_stack((biased_tuple, test_data)) - test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols) - test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector) + test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector) if __name__ == '__main__': From c3d615004a4182f65e8ac42a685e7e298ff527c1 Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Wed, 28 Jun 2017 00:56:58 +0530 Subject: [PATCH 11/14] Update gradient_descent.py --- gradient_descent/gradient_descent.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 5db90ef5..25f89dea 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -2,6 +2,7 @@ Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. """ import numpy +import requests import matplotlib.pyplot as plt @@ -27,7 +28,6 @@ def _hypothesis_value(data, parameter_vector): def get_cost_derivative(input_data, output_data, parameter_vector): """ - :param input_data: Training or testing data :param output_data: Output corresponding to training data :param parameter_vector: Weight vector @@ -89,7 +89,6 @@ def download_data(): """ Downloads test and train data from GitHub repository """ - import requests response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat') train_data = [] train_output = [] @@ -111,7 +110,7 @@ def main(): train_output = train_output.transpose() parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() learning_rate = 0.00015 - absolute_error_limit = 0.000015/4 + absolute_error_limit = 0.000015 relative_error_limit = 0 parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate, absolute_error_limit, From 3802026cfd8e0bd0827e763d3136191a6cee78df Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Wed, 28 Jun 2017 23:05:15 +0530 Subject: [PATCH 12/14] - Graph is shown when sys argument is passed. - Added matplotlib in python3 requirements.txt --- gradient_descent/gradient_descent.py | 23 ++++++++++++++--------- pip3-requirements.txt | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 25f89dea..7f9144eb 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -1,9 +1,12 @@ """ +To view the plot, run as follows: +python3 gradient_descent.py show-plot Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. """ import numpy import requests import matplotlib.pyplot as plt +import sys def _error(input_data, output_data, parameter_vector): @@ -74,15 +77,17 @@ def test_gradient_descent(input_data, output_data, parameter_vector): actual_output = output_data hypothesis_output = _hypothesis_value(input_data, parameter_vector=parameter_vector) - num_examples = len(output_data) - plt.plot(range(num_examples), actual_output, 'r', label='Actual Output') - plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output') - plt.xlabel('Test example') - plt.ylabel('Output Values') - plt.xlim([-1, len(input_data) + 2]) - plt.ylim([-5, 200]) - plt.legend() - plt.show() + if len(sys.argv) == 2: + if sys.argv[1] == 'show-plot': + num_examples = len(output_data) + plt.plot(range(num_examples), actual_output, 'r', label='Actual Output') + plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output') + plt.xlabel('Test example') + plt.ylabel('Output Values') + plt.xlim([-1, len(input_data) + 2]) + plt.ylim([-5, 200]) + plt.legend() + plt.show() def download_data(): diff --git a/pip3-requirements.txt b/pip3-requirements.txt index 6a58861d..de48800f 100644 --- a/pip3-requirements.txt +++ b/pip3-requirements.txt @@ -1,3 +1,4 @@ coala-bears numpy requests +matplotlib From 7a0b0c958eb26c34f7c0f98cb2418196e9987add Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Wed, 28 Jun 2017 23:17:06 +0530 Subject: [PATCH 13/14] Update gradient_descent.py --- gradient_descent/gradient_descent.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/gradient_descent/gradient_descent.py b/gradient_descent/gradient_descent.py index 7f9144eb..85dc7143 100644 --- a/gradient_descent/gradient_descent.py +++ b/gradient_descent/gradient_descent.py @@ -57,7 +57,8 @@ def run_gradient_descent(train_data, train_output, parameter_vector, :return: Optimized parameter vector. """ while True: - cost_derivative = get_cost_derivative(train_data, train_output, parameter_vector) + cost_derivative = get_cost_derivative( + train_data, train_output, parameter_vector) temp_parameter_vector = parameter_vector - \ learning_rate*cost_derivative if numpy.allclose(parameter_vector, temp_parameter_vector, @@ -80,8 +81,10 @@ def test_gradient_descent(input_data, output_data, parameter_vector): if len(sys.argv) == 2: if sys.argv[1] == 'show-plot': num_examples = len(output_data) - plt.plot(range(num_examples), actual_output, 'r', label='Actual Output') - plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output') + plt.plot(range(num_examples), actual_output, + 'r', label='Actual Output') + plt.plot(range(num_examples), hypothesis_output, + 'g', label='Hypothesis Output') plt.xlabel('Test example') plt.ylabel('Output Values') plt.xlim([-1, len(input_data) + 2]) @@ -94,7 +97,8 @@ def download_data(): """ Downloads test and train data from GitHub repository """ - response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat') + response = requests.get( + 'http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat') train_data = [] train_output = [] data_matrix = response.text.split('\n') @@ -110,17 +114,20 @@ def download_data(): def main(): train_data, train_output = download_data() num_data = len(train_data) - biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) + biased_tuple = numpy.asmatrix(numpy.ones( + (1, num_data), dtype=float).transpose()) train_data = numpy.column_stack((biased_tuple, train_data)) train_output = train_output.transpose() - parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() + parameter_vector = numpy.asmatrix( + [2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() learning_rate = 0.00015 absolute_error_limit = 0.000015 relative_error_limit = 0 parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, learning_rate, absolute_error_limit, relative_error_limit) - test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector) + test_gradient_descent(train_data, train_output, + parameter_vector=parameter_vector) if __name__ == '__main__': From 5eea0f978489c93540b3a510952f71a74e8ef854 Mon Sep 17 00:00:00 2001 From: Prakash Rai <201551009@iiitvadodara.ac.in> Date: Wed, 28 Jun 2017 23:33:34 +0530 Subject: [PATCH 14/14] Update pip2-requirements.txt --- pip2-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/pip2-requirements.txt b/pip2-requirements.txt index 945b4703..3dc5e338 100644 --- a/pip2-requirements.txt +++ b/pip2-requirements.txt @@ -1,2 +1,3 @@ numpy requests +matplotlib