Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2014-Group12 #2

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added 2014-Group12/User Manual.pdf
Binary file not shown.
117,687 changes: 117,687 additions & 0 deletions 2014-Group12/code/SentiWordNet_3.0.0_20130122.txt

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions 2014-Group12/code/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from model_feature_creator import create_feature_vector
from model_feature_creator import create_feature_matrix
from model_feature_creator import feature_vector_meaning

from sklearn import metrics
from sklearn import preprocessing
from sklearn import cross_validation
from sklearn.svm import SVC
import sklearn.linear_model as lm

import numpy as np

# Classifiers
def direction_classifier(x,y):
clf = SVC()
clf.fit(x,y)

return clf

def change_classifier(x,y):
clf = lm.LinearRegression()
clf.fit(x,y)

return clf

# Experiments
def experiment_zero(data,company):
print '___Experiment One___'
# Experiment Parameters
finance_datatype = 0 # finance_datatype: Integer 2 = Stock price change, 1 = Percentage stock price change, 0 = Only direction
finance_n = 2 # finance_n: Integer >=0 Number of days of finance data to include
sentiment_datatype = 1 # sentiment_datatype: Boolean 1 = all sentiment featues, 0 = Total
sentiment_n = 1 # sentiment_n: Integer >=0 Number of days of sentiment data to include
day = 0 # day: Boolean 1 = Include day of the week, 0 = do not
target = 0 # target: Boolean 1 = Amount, 0 = Direction
volume = 0 # volume: boolean 1 = Yes, 0 = No
if (finance_n + sentiment_n + day + volume) == 0:
print 'Insufficient parameters set'
return

# Data Processing
feature_vector_meaning(company, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume)
matrix = create_feature_matrix(company, data, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume)
end = len(matrix[0])
train_x = matrix[:,0:end-1]
train_y = matrix[:,end-1]

# Classifier training
scaler = preprocessing.StandardScaler().fit(train_x)
train_x = scaler.transform(train_x)

clf = direction_classifier(train_x,train_y)
cv = cross_validation.ShuffleSplit(len(train_x), n_iter=5, test_size=0.2, random_state=0)
print ' _ _ _Evaluation_ _ _'
if target == 0:
scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=cv, scoring='accuracy')
print(" Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
elif target == 1:
scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=cv, scoring='mean_squared_error')
print(" MSE: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print '====================='

# Read Data
def read_data(filename):
ignore_start = True
matrix = []
with open(filename) as f:
for line in f.readlines():
line = line.replace('\n','')
if ignore_start == False:
matrix.append(line.split('\t'))
else:
ignore_start = False

return matrix

if __name__ == '__main__':
print 'Running Model.py - Creates classifiers and runs experiments'
print '====================='
mcdonalds = read_data('./../output/mcdonalds_daily.txt')
jpmorgan = read_data('./../output/jpmorgan_daily.txt')
# microsoft = read_data('./../output/microsoft_daily.txt')

experiment_zero(jpmorgan, 'jpmorgan')
experiment_zero(mcdonalds, 'mcdonalds')
# experiment_one(microsoft, 'microsoft')
149 changes: 149 additions & 0 deletions 2014-Group12/code/model_feature_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import model_stocks as stocks
import numpy as np

days = ['Mon Jan 13','Tue Jan 14','Wed Jan 15','Thu Jan 16','Fri Jan 17','Mon Jan 20','Tue Jan 21','Wed Jan 22','Thu Jan 23','Fri Jan 24','Mon Jan 27','Tue Jan 28','Wed Jan 29','Thu Jan 30','Fri Jan 31','Mon Feb 03','Tue Feb 04','Wed Feb 05', 'Thu Feb 06', 'Fri Feb 07','Mon Feb 10','Tue Feb 11','Wed Feb 12','Thu Feb 13','Fri Feb 14','Mon Feb 17','Tue Feb 18','Wed Feb 19','Thu Feb 20','Fri Feb 21','Mon Feb 24','Tue Feb 25','Wed Feb 26','Thu Feb 27','Fri Feb 28', 'Mon Mar 03', 'Tue Mar 04', 'Wed Mar 05', 'Thu Mar 06', 'Fri Mar 07']
weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']

# company: String = Name of the company
# raw_vector:
# finance_datatype: Integer 2 = Stock price change, 1 = Percentage stock price change, 0 = Only direction
# finance_n: Integer >=0 Number of days of finance data to include
# sentiment_datatype: Boolean 1 = all sentiment featues, 0 = just end
# sentiment_n: Integer >=0 Number of days of sentiment data to include
# day: Boolean 1 = Include day of the week, 0 = do not
# target: Boolean 1 = Amount, 0 = Direction
# volume: boolean 1 = Yes, 0 = No
def create_feature_vector(company, raw_vector, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target,volume):
date = raw_vector[0]
day_of_the_week = raw_vector[1]

if sentiment_n > 0:
if sentiment_datatype == 1:
feature_vector = raw_vector[2:len(raw_vector)-2]
elif sentiment_datatype == 0:
feature_vector = [raw_vector[len(raw_vector)-2]]
else:
feature_vector = []

if volume == 1:
feature_vector.append(raw_vector[len(raw_vector)-1])

if company == 'jpmorgan':
stock_data = stocks.jpmorgan
elif company == 'microsoft':
stock_data = stocks.microsoft
elif company == 'mcdonalds':
stock_data = stocks.mcdonalds

## Append day of the week
if (day == 1):
day_vector = [0.0]*len(weekdays)
day_vector[weekdays.index(day_of_the_week)] = 1.0
feature_vector.extend(day_vector)

## Append financial data
index_today = days.index(date)
if (finance_n > 0):
for i in range(0,finance_n):
previous_stock = stock_data.get(days[index_today-i])
if finance_datatype == 2: # Stock price change
temp = previous_stock[1] - previous_stock[0]
feature_vector.append(temp)
elif finance_datatype == 1: # Percent stock change
temp = 100*(previous_stock[1]/previous_stock[0])
feature_vector.append(temp)
elif finance_datatype == 0: # Direction
temp = previous_stock[1] - previous_stock[0]
if temp >= 0:
#feature_vector.append('up')
feature_vector.append(1)
else:
#feature_vector.append('down')
feature_vector.append(-1)

## Append sentiments
if (sentiment_n > 0):
pass

# feature_vector.append(1)
## Target
tomorrows_stock = stock_data.get(days[index_today+1])
if target == 1: # Amount
temp = 100*(tomorrows_stock[1]/tomorrows_stock[0])
elif target == 0: # Direction
temp = tomorrows_stock[1] - tomorrows_stock[0]
if temp >= 0:
#temp = 'up'
temp = 1.0
else:
#temp = 'down'
temp = -1.0
feature_vector.append(temp)

return np.float_(feature_vector)

def create_feature_matrix(company, data, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume):
matrix = []
for line in data:
temp = create_feature_vector(company, line, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume)
if (len(matrix) == 0):
matrix = np.array([temp])
else:
matrix = np.vstack((matrix,temp))

return matrix

def feature_vector_meaning(company, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume):
print ' _ _ _Settings_ _ _'
print ' Company: `%s\'' % company

if target == 0:
p = "Direction of the stock price change"
elif target == 1:
p = "Percentage movement of the stock price"
print ' Predicting: %s' % p

if finance_n > 0:
if finance_datatype == 2:
fdt = "Real stock price change"
elif finance_datatype == 1:
fdt = "Percentage stock price change"
elif finance_datatype == 0:
fdt = "Direction of the stock price change"
fd = "Using %d days of financial data" % finance_n
print ' Financial Data: %s' % fd
print ' Financial Datatype: %s' % fdt
else:
fd = "No financial features"
print ' Financial Data: %s' % fd

if sentiment_n == 0:
sd = "No sentiment features"
elif sentiment_n > 0:
sd = "Using %d days of sentiment data" % sentiment_n
print ' Sentiment Data: %s' % sd
if sentiment_datatype == 1:
sdt = "All values"
print ' Sentiment Datatype: %s' % sdt
elif sentiment_datatype == 0:
sdt = "Just total score"
print ' Sentiment Datatype: %s' % sdt

if day == 1:
df = "Using day of the week as a feature"
elif day == 0:
df = "No day of the week feature"

if volume == 1:
vf = "Using volume as a feature"
elif volume == 0:
vf = "No volume feature"

print ' Day Feature: %s' % df
print ' Volune Feature: %s' % vf

if __name__ == '__main__':
raw_vector = 'Fri Feb 14 Fri 0.9 4.75 76.349 132.361464937 826.0'
raw_vector = raw_vector.split('\t')

print create_feature_vector('jpmorgan', raw_vector, 0, 0, 0, 0, 0, 0 ,0)
Loading