-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprogram.py
39 lines (29 loc) · 1.29 KB
/
program.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pickle
from utilities import Tweet # necessary for loading tweet objects
from utilities import preprocess
from utilities import BayesSentimentClassifier
from utilities import accuracy_score
from sklearn.cross_validation import KFold
def main():
# load the tweet objects
tweets = pickle.load(open('tweets.p', 'rb'))
# preprocess each tweet's text
preprocess(tweets)
# perform KFold cross-validation with 3 folds to get more accurate accuracy prediction
kf = KFold(n=len(tweets), n_folds=3, shuffle=True)
for train_indices, test_indices in kf:
tweets_train = [tweets[i] for i in train_indices]
tweets_test = [tweets[i] for i in test_indices]
clf = BayesSentimentClassifier()
clf.train(tweets_train) # train the classifier, i.e. populate the sentiment dictionary
clf.predict(tweets_test) # predict sentiment of each tweet using Bayes Theorem
# calculate accuracy
print(accuracy_score(tweets_test))
test = Tweet("", "", "I am skeptical about this result", "Tue Oct 18 18:05:50 +0000 2011", "")
clf.predict([test])
print(test.prediction.sentiment)
print(test.prediction.probabilities)
# TODO: try word boundaries
# TODO: try tdidf
if __name__ == '__main__':
main()