Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Gradient Descent [Python] #348

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .datasets/linear_regression/rock_aquifer_test.dat

This file was deleted.

25 changes: 0 additions & 25 deletions .datasets/linear_regression/rock_aquifer_train.dat

This file was deleted.

89 changes: 33 additions & 56 deletions gradient_descent/gradient_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
"""
import numpy
from os.path import expanduser
import matplotlib.pyplot as plt


Expand All @@ -11,7 +10,7 @@ def _error(input_data, output_data, parameter_vector):
:param input_data: Data whose summation of cost derivative has to be calculated
:param output_data: Output corresponding to training data
:param parameter_vector: Weight vector
:return:
:return: Error in hypothesis value
"""
return _hypothesis_value(input_data, parameter_vector) - output_data

Expand All @@ -26,29 +25,17 @@ def _hypothesis_value(data, parameter_vector):
return numpy.asmatrix(numpy.dot(data, parameter_vector))


def summation_of_cost_derivative(input_data, output_data, parameter_vector):
def get_cost_derivative(input_data, output_data, parameter_vector):
"""
Calculates the sum of cost function derivative
:param input_data: Data whose summation of cost derivative has to be calculated
:param output_data: Output corresponding to training data
:param parameter_vector: Weight vector

:return: Returns the summation of cost derivative
"""
return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))


def get_cost_derivative(train_data, train_output, parameter_vector):
"""

:param train_data: Training data
:param train_output: Output corresponding to training data
:param input_data: Training or testing data
:param output_data: Output corresponding to training data
:param parameter_vector: Weight vector
:return: derivative vector
"""
train_data_size = len(train_data)
return summation_of_cost_derivative(train_data, train_output,
parameter_vector)/train_data_size
train_data_size = len(input_data)
return numpy.dot(input_data.transpose(), _error(input_data, output_data, parameter_vector))\
/ train_data_size


def run_gradient_descent(train_data, train_output, parameter_vector,
Expand Down Expand Up @@ -77,22 +64,23 @@ def run_gradient_descent(train_data, train_output, parameter_vector,
return parameter_vector


def test_gradient_descent(test_data, test_output, parameter_vector):
def test_gradient_descent(input_data, output_data, parameter_vector):
"""
:param test_data: Input data to be tested
:param test_output: Actual Output data for Input dataset
:param input_data: Input data to be tested
:param output_data: Actual Output data for Input dataset
:param parameter_vector: Weight vector after optimized by using gradient descent
:return: None
"""
actual_output = test_output
hypothesis_output = _hypothesis_value(test_data,
actual_output = output_data
hypothesis_output = _hypothesis_value(input_data,
parameter_vector=parameter_vector)
num_examples = len(test_output)
plt.stem(range(num_examples), actual_output, markerfmt='go', label='Actual Output')
plt.stem(range(num_examples), hypothesis_output, label='Hypothesis Output')
plt.xlabel('Test case')
num_examples = len(output_data)
plt.plot(range(num_examples), actual_output, 'r', label='Actual Output')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check for argument passed for plotting the graph

plt.plot(range(num_examples), hypothesis_output, 'g', label='Hypothesis Output')
plt.xlabel('Test example')
plt.ylabel('Output Values')
plt.xlim([-1, 7])
plt.xlim([-1, len(input_data) + 2])
plt.ylim([-5, 200])
plt.legend()
plt.show()
Copy link
Member

@yashLadha yashLadha Jun 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't the program end. If u haven't close the plot explicitly. May be this will hold the travis-ci build to complete. Make a timer or something so that it get closed.


Expand All @@ -102,44 +90,33 @@ def download_data():
Downloads test and train data from GitHub repository
"""
import requests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put the import , where all the declarations are defined

home = expanduser('~')
response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
'linear_regression/rock_aquifer_train.dat')
if response:
with open(home+'/rock_aquifer_train.dat', 'wb') as f:
f.write(response.text)
response = requests.get('https://github.com/iiitv/algos/blob/master/.datasets/'
'linear_regression/rock_aquifer_test.dat')
if response:
with open(home + '/rock_aquifer_test.dat', 'wb') as f:
f.write(response.text)
response = requests.get('http://www.stat.ufl.edu/~winner/data/rock_aquifer.dat')
train_data = []
train_output = []
data_matrix = response.text.split('\n')
for data_tuple in data_matrix:
data_tuple = data_tuple.split()
if data_tuple:
train_data.append(data_tuple[:11])
train_output.append(data_tuple[-1])
return numpy.asmatrix(train_data).astype(dtype='float'), \
numpy.asmatrix(train_output).astype(dtype='float')


def main():
download_data()
home = expanduser('~')
input_cols = list(range(11))
train_data = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
usecols=input_cols))
train_data, train_output = download_data()
num_data = len(train_data)
biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
train_data = numpy.column_stack((biased_tuple, train_data))
output_cols = (11,)
train_output = numpy.asmatrix(numpy.loadtxt(home + '/Documents/rock_aquifer_train.dat',
usecols=output_cols)).transpose()
train_output = train_output.transpose()
parameter_vector = numpy.asmatrix([2, 4, 1, 5, 4, 1, 2, 2, 3, 1, 1, 2]).transpose()
learning_rate = 0.00015
absolute_error_limit = 0.000015
absolute_error_limit = 0.000015/4
Copy link
Member

@yashLadha yashLadha Jun 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this is so? Why you are not using the simplified version?.

relative_error_limit = 0
parameter_vector = run_gradient_descent(train_data, train_output, parameter_vector,
learning_rate, absolute_error_limit,
relative_error_limit)
test_data = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=input_cols)
num_data = len(test_data)
biased_tuple = numpy.asmatrix(numpy.ones((1, num_data), dtype=float).transpose())
test_data = numpy.column_stack((biased_tuple, test_data))
test_output = numpy.loadtxt(home + '/Documents/rock_aquifer_test.dat', usecols=output_cols)
test_gradient_descent(test_data, test_output, parameter_vector=parameter_vector)
test_gradient_descent(train_data, train_output, parameter_vector=parameter_vector)


if __name__ == '__main__':
Expand Down