-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_agents.py
22 lines (20 loc) · 983 Bytes
/
main_agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from agents_simple_case import random_agent
from agents_simple_case import epsilon_greedy_agent
from agents_simple_case import thompson_agent
import matplotlib.pyplot as plt
if __name__ == '__main__':
reward_prob_list = [0.2, 0.6, 0.7, 0.4, 0.6, 0.1]
rounds = 100
steps = 1000
lines = "\n----------------------------\n"
rnd = random_agent.play(reward_prob_list, rounds, steps) # agente casuale
print(lines)
eg = epsilon_greedy_agent.play(reward_prob_list, rounds, steps) # agente epsilon-greedy
print(lines)
th = thompson_agent.play(reward_prob_list, rounds, steps) # agente di thompson
print(lines)
best_theorical = max(reward_prob_list) * steps # massimo ottenibile sapendo a priori le probabilità
agents = ["Thompson", "Random", "Epsilon-Greedy", "Onniscente"]
plt.bar(agents, [th, rnd, eg, best_theorical])
plt.title(f"Ricompensa media per diversi agenti in {rounds} round da {steps} passi ciascuno")
plt.show()