-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimplicit_ratings_calculator.py
149 lines (96 loc) · 3.4 KB
/
implicit_ratings_calculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "prs_project.settings")
import django
from django.db.models import Count
django.setup()
import datetime
from datetime import date, timedelta
from collections import defaultdict
from collector.models import Log
from analytics.models import Rating
w1 = 100
w2 = 50
w3 = 15
def calculate_decay(age_in_days):
return 1/age_in_days
def query_log_for_users():
"""
Equivalent to following sql:
select distinct(user_id)
from collector_log log
"""
return Log.objects.values('user_id').distinct()
def query_log_data_for_user(userid):
"""
Equivalent to following sql:
SELECT *
FROM collector_log log
WHERE user_id = {}
"""
return Log.objects.filter(user_id=userid)
def query_aggregated_log_data_for_user(userid):
user_data = Log.objects.filter(user_id = userid).values('user_id',
'content_id',
'event').annotate(count=Count('created'))
return user_data
def calculate_implicit_ratings_w_timedecay(user_id):
data = query_log_data_for_user(user_id)
weights = {'buy': w1, 'moredetails': w2, 'details': w3 }
ratings = dict()
for entry in data:
movie_id = entry.movie_id
event_type = entry.event
if movie_id in ratings:
age = (date.today() - entry.created) // timedelta(days=365.2425)
decay = calculate_decay(age)
ratings[movie_id] += weights[event_type]*decay
return ratings
def calculate_implicit_ratings_for_user(user_id):
data = query_aggregated_log_data_for_user(user_id)
agg_data = dict()
max_rating = 0
for row in data:
content_id = str(row['content_id'])
if content_id not in agg_data .keys():
agg_data[content_id] = defaultdict(int)
agg_data[content_id][row['event']] = row['count']
ratings = dict()
for k, v in agg_data .items():
rating = w1 * v['buy'] + w2 * v['details'] + w3 * v['moredetails']
max_rating = max(max_rating, rating)
ratings[k] = rating
for content_id in ratings.keys():
ratings[content_id] = 10 * ratings[content_id] / max_rating
return ratings
def save_ratings(ratings, user_id, type):
print("saving ratings for {}".format(user_id))
i = 0
for content_id, rating in ratings.items():
if rating > 0:
Rating(
user_id=user_id,
movie_id=str(content_id),
rating=rating,
rating_timestamp=datetime.datetime.now(),
type=type
).save()
print ('{} {}'.format(user_id, str(content_id)))
i += 1
if i == 100:
print('.', end="")
i = 0
def calculate_ratings_with_timedecay():
for user in query_log_for_users():
userid = user['user_id']
ratings = calculate_implicit_ratings_w_timedecay(userid)
save_ratings(ratings, userid, 'implicit_w')
def calculate_ratings():
rows = query_log_for_users()
for user in rows:
userid = user['user_id']
ratings = calculate_implicit_ratings_for_user(userid)
save_ratings(ratings, userid, 'implicit')
if __name__ == '__main__':
print("Calculating implicit ratings...")
Rating.objects.filter(type='implicit').delete()
calculate_ratings()