-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsa.py
61 lines (47 loc) · 2.27 KB
/
sa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pprint
import re
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from kw import negkeywords, poskeywords
from tc import negstatements, posstatements
sia = SentimentIntensityAnalyzer()
emoticon_pattern=re.compile(r'(?::|;|=)(?:-)?(?:\)|\(|D|P)', re.VERBOSE | re.IGNORECASE)
special_entities_pattern=re.compile(r'\@\#', re.VERBOSE | re.IGNORECASE)
def preprocess_text(text):
emoticons_found = emoticon_pattern.findall(text)
text = emoticon_pattern.sub('', text)
text = ''.join(text.split())
return text
def enhance_compound_score(text, base_compound_score):
tokens = nltk.word_tokenize(text.lower())
keyword_score = sum([1 if token in poskeywords else -1 if token in negkeywords else 0 for token in tokens])
return base_compound_score + keyword_score * 0.1
def perform_enhanced_sentiment_analysis(text):
preprocessed_text = preprocess_text(text)
vader_scores = sia.polarity_scores(preprocessed_text)
enhanced_compound_score = enhance_compound_score(preprocessed_text, vader_scores['compound'])
return dict(vader_scores, enhanced_compound = enhanced_compound_score)
def analyze_keyword(text):
return perform_enhanced_sentiment_analysis(text)
def analyze_keywords(keywords):
return {keyword: perform_enhanced_sentiment_analysis(keyword) for keyword in keywords}
def analyze_all_keywords():
negkeywords_analysis = analyze_keywords(negkeywords)
poskeywords_analysis = analyze_keywords(poskeywords)
return {"negative": negkeywords_analysis, "positive": poskeywords_analysis}
def analyze_corpus(statements):
return {text_id: perform_enhanced_sentiment_analysis(text) for text_id, text in enumerate(statements)}
def analyze_text_corpus():
analyzed_negstatements = analyze_corpus(negstatements)
analyzed_posstatements = analyze_corpus(posstatements)
return {
"negative": analyzed_negstatements,
"positive": analyzed_posstatements
}
sentiment_scores_statements = analyze_text_corpus()
sentiment_scores_keywords = analyze_all_keywords()
pp = pprint.PrettyPrinter(indent=4)
print("Sentiment scores for statements:")
pp.pprint(sentiment_scores_statements)
print("\nSentiment scores for keywords:")
pp.pprint(sentiment_scores_keywords)