-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
72 lines (50 loc) · 2.03 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def calculate_q_value(state, action, new_state, reward):
"""
Update Q-value of a given state
state - the state for which Q-value will be Ndated
action - the action taken on the state
new_state - the state on which the robot will land
reward - the living reward of the current state
"""
alpha, gamma = 0.1, 0.9
return (1 - alpha) * state.get_q_value(action) + alpha * (reward + gamma * new_state.get_max_q_value())
import random
def epsolon_select_action(state, e, limit):
n = random.uniform(0, 1)
action = state.get_max_action()
if n < e:
actions = list(filter(lambda a: state.get_action_usage_count(a) < limit, state.get_actions().keys()))
if actions:
action = random.choice(actions)
# state.increase_action_usage_count(action)
return action
import pickle
def save_object(object, filename):
file = open(filename, 'wb')
pickle.dump(object, file, pickle.HIGHEST_PROTOCOL)
def load_object(filename):
file = open(filename, 'rb')
return pickle.load(file)
from world import World
from state import State
from agent import Agent
def get_restructured_world(old_world, new_agent):
if new_agent:
world = World(old_world.get_id(), Agent(new_agent))
else:
world = World(old_world.get_id(), Agent(old_world.get_agent().get_id()))
world.update_current_state(old_world.get_current_state())
new_states = {}
for i in map(str, range(40)):
for j in map(str, range(40)):
old_state = old_world.get_state((i, j))
new_state = State(i, j)
new_state.set_q_values(old_state.get_q_values())
new_state.set_actions(old_state.get_actions())
new_state.set_living_reward(old_state.get_living_reward())
if old_state.is_terminal():
new_state.mark_as_terminal()
new_states[(i, j)] = new_state
new_states[('terminal', 'terminal')] = State('terminal', 'terminal')
world.set_states(new_states)
return world