-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtagger.py
105 lines (88 loc) · 3.72 KB
/
tagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pymysql.cursors
import traceback
import operator
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('db.cfg')
connection = pymysql.connect(host=config.get('database','host'),
user=config.get('database','username'),
password=config.get('database','password'),
db = config.get('database','db'),
charset = 'utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
def getTags():
tags = {}
i = 1
with connection.cursor() as cursor:
query = "SELECT DISTINCT(P.Tags) as Tags, PT.TopicId as TopicId FROM Posts P join PostTopicMap PT on PT.PostId = P.Id"
cursor.execute(query)
posts = cursor.fetchall()
for row in posts:
if row[u'Tags'] != None and row[u'Tags'] != "":
s = row[u'Tags'][1:-1]
a = s.split("><")
for each in a:
tags[each] = row[u'TopicId']
i = i + 1
return tags
def putTags(tags):
i = 1
with connection.cursor() as cursor:
for tag in list(tags):
query = "INSERT INTO Tags(TagName, TopicId) values (%s, %s)"
cursor.execute(query, (tag, tags[tag]))
connection.commit()
i = i + 1
def getTagFrequency(userid):
i = 1
result = {}
with connection.cursor() as cursor:
query1 = "select TagName, TopicId from Tags"
cursor.execute(query1)
Tags = cursor.fetchall()
tags = {}
for tag in Tags:
tags[tag[u'TagName']] = tag[u'TopicId']
query2 = "select TopicId, Weight from UserInterests where UserId = %s order by Weight desc Limit 5"
cursor.execute(query2, (userid))
Interests = cursor.fetchall()
query3 = "SELECT Distinct(Tags) as Tags from Posts where Id in (SELECT PT.PostId from Topics as T join PostTopicMap as PT on \
PT.TopicId = T.Id where T.Id = %s or T.Id = %s or T.Id = %s or T.Id = %s or T.Id = %s)"
cursor.execute(query3, (Interests[0][u'TopicId'], Interests[1][u'TopicId'], Interests[2][u'TopicId'], \
Interests[3][u'TopicId'], Interests[4][u'TopicId']))
Posts = cursor.fetchall()
for post in Posts:
if post[u'Tags'] != None and post[u'Tags'] != "":
s = post[u'Tags'][1:-1]
a = s.split("><")
for each in a:
if each in result.keys():
result[each] = result[each] + 1
else:
result[each] = 1
i = i + 1
resultlist = sorted(result.items(), key=operator.itemgetter(1))
resultlist.reverse()
res = {}
res["name"] = "words"
children = []
childs = {}
for (each, val) in resultlist[:150]:
if each != "java":
if tags[each] not in childs.keys():
childs[tags[each]] = [{"name":each, "size": val}]
else:
childs[tags[each]] = childs[tags[each]] + [{"name":each, "size": val}]
i = 0
for each in childs.keys():
i += 1
subchildren = []
#for e in childs[each]:
# subchildren.append(e)
children.append({"name": i, "children":childs[each]})
res["children"] = children
return res
if __name__ == "__main__":
#tags = getTags()
#putTags(tags)
print getTagFrequency("821742")