-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathword_embeddings.py
29 lines (24 loc) · 1.19 KB
/
word_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# -*- coding: utf-8 -*-
from termcolor import colored
from gensim.models import KeyedVectors
from spellchecker import SpellChecker
class WordEmbeddings:
def __init__(self):
self.model_path = "./models/twitter/word2vec_twitter_model.bin"
self.model = KeyedVectors.load_word2vec_format(self.model_path, unicode_errors='ignore', binary=True)
print('Word2Vec is loaded for semantic similarity task ...')
def checkSemanticSimilarity(self, labels, words):
print(colored('############ WORD SIMILARITY CHECK ################', 'blue'))
for label in labels:
tmpSimilarity= 0
tmpWord = ''
for word in words:
if len(word) > 2:
spell = SpellChecker()
newWord = spell.correction(word)
if (label in self.model.vocab) and (newWord in self.model.vocab):
similarity = self.model.similarity(label, newWord)
if similarity > tmpSimilarity:
tmpWord = newWord
tmpSimilarity = similarity
print label + '=>' + tmpWord + ': ' + colored(str(tmpSimilarity), 'green')