-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecommender_hybrid.py
97 lines (65 loc) · 3.48 KB
/
recommender_hybrid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
from utils.utils import sort_desc
from sklearn.metrics.pairwise import cosine_similarity
def get_weighted_hybrid_recommendations(relevant_centroid, irrelevant_centroid, predictions_content_based_user_centroid, predictions_item_collaborative):
_base_weight = 0.25
_max_weight = 1
_threshold = 0
weight_content_based = _base_weight * (len(relevant_centroid) > _threshold) + \
_base_weight * (len(irrelevant_centroid) > _threshold)
weight_collaborative_based = _max_weight - weight_content_based
# set weight to content-based filtering: [0, 0.25, 0.5]. collaborative filtering is the complement
predictions_weighted_hybrid = [(item[0][0], (item[0][1] * weight_content_based +
item[1][1] * weight_collaborative_based))
for item in zip(predictions_content_based_user_centroid,
predictions_item_collaborative)]
return predictions_weighted_hybrid
def get_mixing_hybrid_recommendations(predictions_item_collaborative, predictions_content_based_user_centroid):
predictions_mixing_hybrid = predictions_item_collaborative[:5]
cp_mixing_hybrid = predictions_mixing_hybrid[:]
for item in predictions_content_based_user_centroid:
if len(predictions_mixing_hybrid) == 10:
break
if item[0] not in [x[0] for x in cp_mixing_hybrid]:
predictions_mixing_hybrid.append(item)
return predictions_mixing_hybrid
def get_weighted_hybrid_recommendations(predictions, movie_set):
hybrid_predictions = []
_num_vectors = 2
for trailer_id, ratings in movie_set:
sum_ratings = sum([p_ui for tid, p_ui in predictions if tid == trailer_id])
hybrid_predictions.append((trailer_id, sum_ratings / _num_vectors))
return sort_desc(hybrid_predictions)
def get_switching_hybrid_recommendations(movies_to_predict, _all_ratings, _target_user_id, sim_matrix):
predictions = []
_limit_top_neighbours_to = 20
target_user_ratings = _all_ratings[_all_ratings['userID'] == _target_user_id]
for trailer_id, rating in movies_to_predict:
top_neighbours = []
# find most similar movies
for rated_movie in target_user_ratings['id']:
intersect = pd.merge(_all_ratings[_all_ratings['id'] == rated_movie],
_all_ratings[_all_ratings['id'] == trailer_id], on='userID')
# print intersect
try:
sim = cosine_similarity(intersect['rating_x'].reshape(1, -1), intersect['rating_y'].reshape(1, -1))
top_neighbours.append((rated_movie, sim[0][0]))
except ValueError:
try:
sim = sim_matrix[rated_movie][trailer_id]
top_neighbours.append((rated_movie, sim))
except KeyError:
continue
top_n = sort_desc(top_neighbours)[:_limit_top_neighbours_to]
numerator, denominator = (0, 0)
for neighbour, sim in top_n:
user_rating = _all_ratings[(_all_ratings['id'] == neighbour) & (_all_ratings['userID'] == _target_user_id)][
'rating'].iloc[0]
numerator += sim * user_rating
denominator += abs(sim)
try:
p_ui = numerator / denominator
except ZeroDivisionError:
p_ui = 0
predictions.append((trailer_id, p_ui))
return sort_desc(predictions)