forked from andrewhart05/SEE-Which-is-Better
-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.py
120 lines (97 loc) · 4.3 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
image = "Waterbottle"
def calculate_expected_probability(rating_i, rating_j):
return 1 / (1 + 10 ** ((rating_j - rating_i) / 400))
def update_ratings(ratings, results, K):
n = results.shape[0] # Number of opponents based on the results matrix size
for i in range(n):
for j in range(i + 1, n):
expected_prob_i_beats_j = calculate_expected_probability(ratings[i], ratings[j])
expected_prob_j_beats_i = 1 - expected_prob_i_beats_j
# Update ratings based on the result matrix
outcome_i_j = results[i, j] # Outcome of i vs j
outcome_j_i = results[j, i] # Outcome of j vs i (should complement the above)
ratings[i] += K * (outcome_i_j - expected_prob_i_beats_j)
ratings[j] += K * (outcome_j_i - expected_prob_j_beats_i)
return ratings
def read_and_prepare_matrix(file_path):
df = pd.read_csv(file_path, index_col=0) # Assuming the first column is an index or identifier
objects = list(df.index) # Extract object names
data_matrix = df.values # Use values directly for the numpy array
return objects, data_matrix
files = [
f"Results/Dirk_{image}_choices_matrix.csv",
f"Results/Maryam_{image}_choices_matrix.csv",
f"Results/Nathan_{image}_choices_matrix.csv",
f"Results/Doruk_{image}_choices_matrix.csv",
f"Results/Andrew_{image}_choices_matrix.csv"
]
K = 32
initial_ratings = {} # Initialize an empty dictionary for ratings
for file in files:
objects, results_matrix = read_and_prepare_matrix(file)
# Initialize or update ratings for objects
for obj in objects:
if obj not in initial_ratings:
initial_ratings[obj] = 0.0 # Start with a base rating for new objects
# Prepare the ratings list in the order of objects for this file
ratings_list = np.array([initial_ratings[obj] for obj in objects])
# Update ratings based on this file's results
updated_ratings = update_ratings(ratings_list, results_matrix, K)
# Update the global ratings dictionary
for i, obj in enumerate(objects):
initial_ratings[obj] = updated_ratings[i]
# Convert ratings to a sorted list for visualization and analysis
sorted_ratings = sorted(initial_ratings.items(), key=lambda x: x[1], reverse=True)
# Plotting the final ratings
plt.figure(figsize=(12, 8))
objects, ratings = zip(*sorted_ratings)
plt.bar(objects, ratings)
plt.xticks(rotation=90)
plt.title("ELO Ratings of Opponents")
plt.ylabel("ELO Rating")
plt.show()
# Print the ELO scores
print("ELO Scores before normalization:")
for obj, rating in sorted_ratings:
print(f"{obj}: {rating}")
# Normalize the scores
ratings_only = [rating for _, rating in sorted_ratings]
min_rating = min(ratings_only)
max_rating = max(ratings_only)
normalized_scores = [(obj, (rating - min_rating) / (max_rating - min_rating)) for obj, rating in sorted_ratings]
# Print the normalized scores
print("\nNormalized ELO Scores (0 to 1 scale):")
for obj, normalized_score in normalized_scores:
print(f"{obj}: {normalized_score:.3f}")
# Assuming normalized_scores is a list of tuples (object, normalized_score)
normalized_scores_dict = dict(normalized_scores)
# Create a list of objects to maintain order
objects_list = [obj for obj, _ in normalized_scores]
# Initialize an empty matrix
n = len(normalized_scores)
distance_matrix = np.zeros((n, n))
# Populate the distance matrix
for i in range(n):
for j in range(n):
distance_matrix[i, j] = normalized_scores_dict[objects_list[i]] - normalized_scores_dict[objects_list[j]]
# Display the distance matrix
print("\nDistance Matrix:")
print(distance_matrix)
# Plotting the normalized ELO scores
plt.figure(figsize=(12, 8))
objects, normalized_ratings = zip(*normalized_scores) # Unpack the objects and their normalized scores
plt.bar(objects, normalized_ratings, color='skyblue')
plt.xticks(rotation=90)
plt.title("Normalized ELO Scores")
plt.ylabel("Normalized Score")
plt.xlabel("Objects")
plt.tight_layout() # Adjust layout to make room for the rotated x-axis labels
plt.show()
# Convert the distance matrix to a DataFrame
distance_df = pd.DataFrame(distance_matrix, index=objects_list, columns=objects_list)
# Output the DataFrame to a CSV file
csv_file_path = f'{image}_distance_matrix.csv'
distance_df.to_csv(csv_file_path)