-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoffice_temp.py
35 lines (25 loc) · 1.09 KB
/
office_temp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from gensim.models import Word2Vec
from sys import argv
from gensim.models import Doc2Vec
import time
from random import shuffle
from io import open
EPOCHS = 51
MODEL_DIR = "/lustre/amar/office_models"
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
#ve(MODEL_DIR + "/" + algorithm + "_model_dimensions_"+str(dimensions)+"_epoch_"+str(epoch)+".word2vec")
algorithm = argv[1]
dimensions = argv[2]
epoch = argv[3]
model_filename = MODEL_DIR + "/" + algorithm + "_model_dimensions_"+str(dimensions)+"_epoch_"+str(epoch)+".word2vec"
if algorithm in ['pvdm', 'dbow']:
model = Doc2Vec.load(model_filename)
else:
model = Word2Vec.load(model_filename)
words = ['bugs', 'bug', 'firefox', 'email', 'emails', 'word', 'excel','calc', 'office', 'microsoft', 'file', 'xml', 'crash', 'java', 'math','code','problem', 'linux', 'pdf', 'document', 'impress', 'font', 'size']
qfile = open('OfficeQualitativeAnalysis_'+algorithm+'.tsv', "w+")
for word in words:
i = model.wv.most_similar(word, topn =10)
qfile.write(unicode(word + "\t" + str(i)+"\n"))
qfile.close()