-
Notifications
You must be signed in to change notification settings - Fork 496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Gradient Descent [Python] #348
Changes from 1 commit
a64e36d
84c1a6d
e7246c4
9ddb8b9
f3cb691
3f7c2d1
29ce52d
d88fe72
3556dc3
a2d3d46
c4f79bf
f0a6367
77c58df
1e0a530
6da9dab
c3d6150
c74af4b
3802026
7a0b0c9
5eea0f9
601d0fb
d592d32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,6 @@ | |
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. | ||
""" | ||
import numpy | ||
from os.path import expanduser | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
|
@@ -11,7 +10,7 @@ def _error(input_data, output_data, parameter_vector): | |
:param input_data: Data whose summation of cost derivative has to be calculated | ||
:param output_data: Output corresponding to training data | ||
:param parameter_vector: Weight vector | ||
:return: | ||
:return: Error in hypothesis value | ||
""" | ||
return _hypothesis_value(input_data, parameter_vector) - output_data | ||
|
||
|
@@ -26,29 +25,17 @@ def _hypothesis_value(data, parameter_vector): | |
return numpy.asmatrix(numpy.dot(data, parameter_vector)) | ||
|
||
|
||
def summation_of_cost_derivative(input_data, output_data, parameter_vector): | ||
def get_cost_derivative(input_data, output_data, parameter_vector): | ||
""" | ||
Calculates the sum of cost function derivative | ||
:param input_data: Data whose summation of cost derivative has to be calculated | ||
:param output_data: Output corresponding to training data | ||
:param parameter_vector: Weight vector | ||
|
||
:return: Returns the summation of cost derivative | ||
""" | ||
return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector)) | ||
|
||
|
||
def get_cost_derivative(train_data, train_output, parameter_vector): | ||
""" | ||
|
||
:param train_data: Training data | ||
:param train_output: Output corresponding to training data | ||
:param input_data: Training or testing data | ||
:param output_data: Output corresponding to training data | ||
:param parameter_vector: Weight vector | ||
:return: derivative vector | ||
""" | ||
train_data_size = len(train_data) | ||
return summation_of_cost_derivative(train_data, train_output, | ||
parameter_vector)/train_data_size | ||
train_data_size = len(input_data) | ||
return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))\ | ||
/ train_data_size | ||
|
||
|
||
def run_gradient_descent(train_data, train_output, parameter_vector, | ||
|
@@ -77,22 +64,23 @@ def run_gradient_descent(train_data, train_output, parameter_vector, | |
return parameter_vector | ||
|
||
|
||
def test_gradient_descent(test_data, test_output, parameter_vector): | ||
def test_gradient_descent(input_data, output_data, parameter_vector): | ||
""" | ||
:param test_data: Input data to be tested | ||
:param test_output: Actual Output data for Input dataset | ||
:param input_data: Input data to be tested | ||
:param output_data: Actual Output data for Input dataset | ||
:param parameter_vector: Weight vector after optimized by using gradient descent | ||
:return: None | ||
""" | ||
actual_output = test_output | ||
hypothesis_output = _hypothesis_value(test_data, | ||
actual_output = output_data | ||
hypothesis_output = _hypothesis_value(input_data, | ||
parameter_vector=parameter_vector) | ||
num_examples = len(test_output) | ||
plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output') | ||
plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output') | ||
plt.xlabel('Test case') | ||
num_examples = len(output_data) | ||
plt.plot(range(num_examples), actual_output, 'r', label='Actual Output') | ||
plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output') | ||
plt.xlabel('Test example') | ||
plt.ylabel('Output Values') | ||
plt.xlim([-1, 7]) | ||
plt.xlim([-1, len(input_data) + 2]) | ||
plt.ylim([-5, 200]) | ||
plt.legend() | ||
plt.show() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't the program end. If u haven't close the plot explicitly. May be this will hold the |
||
|
||
|
@@ -102,44 +90,33 @@ def download_data(): | |
Downloads test and train data from GitHub repository | ||
""" | ||
import requests | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Put the import , where all the declarations are defined |
||
home = expanduser('~') | ||
response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' | ||
'linear_regression/rock_aquifer_train.dat') | ||
if response: | ||
with open(home+'/rock_aquifer_train.dat', 'wb') as f: | ||
f.write(response.text) | ||
response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/' | ||
'linear_regression/rock_aquifer_test.dat') | ||
if response: | ||
with open(home + '/rock_aquifer_test.dat', 'wb') as f: | ||
f.write(response.text) | ||
response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat') | ||
train_data = [] | ||
train_output = [] | ||
data_matrix = response.text.split('\n') | ||
for data_tuple in data_matrix: | ||
data_tuple = data_tuple.split() | ||
if data_tuple: | ||
train_data.append(data_tuple[:11]) | ||
train_output.append(data_tuple[-1]) | ||
return numpy.asmatrix(train_data).astype(dtype='float'), \ | ||
numpy.asmatrix(train_output).astype(dtype='float') | ||
|
||
|
||
def main(): | ||
download_data() | ||
home = expanduser('~') | ||
input_cols = list(range(11)) | ||
train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', | ||
usecols=input_cols)) | ||
train_data, train_output = download_data() | ||
num_data = len(train_data) | ||
biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) | ||
train_data = numpy.column_stack((biased_tuple, train_data)) | ||
output_cols = (11,) | ||
train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat', | ||
usecols=output_cols)).transpose() | ||
train_output = train_output.transpose() | ||
parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose() | ||
learning_rate = 0.00015 | ||
absolute_error_limit = 0.000015 | ||
absolute_error_limit = 0.000015/4 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this is so? Why you are not using the simplified version?. |
||
relative_error_limit = 0 | ||
parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector, | ||
learning_rate, absolute_error_limit, | ||
relative_error_limit) | ||
test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols) | ||
num_data = len(test_data) | ||
biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose()) | ||
test_data = numpy.column_stack((biased_tuple, test_data)) | ||
test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols) | ||
test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector) | ||
test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector) | ||
|
||
|
||
if __name__ == '__main__': | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check for argument passed for plotting the graph