-
Notifications
You must be signed in to change notification settings - Fork 7
/
Model.py
104 lines (86 loc) · 3.39 KB
/
Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import parl
from parl import layers
# actor
class Actor(parl.Model):
def __init__(self, act_dim, laser_num=7):
self.laser_num = laser_num
# --------------------------------------------
# for baseline
# self.fc1 = layers.fc(size=512, act="relu")
# self.fc2 = layers.fc(size=512, act="relu")
# self.fc3 = layers.fc(size=512, act="relu")
# self.fc4 = layers.fc(size=act_dim, act="tanh")
# --------------------------------------------
# --------------------------------------------
# our method
self.fc1 = layers.fc(size=32, act="relu")
self.fc2 = layers.fc(size=64, act=None)
self.fc3 = layers.fc(size=128, act=None)
self.fc4 = layers.fc(size=32, act=None)
self.fc5 = layers.fc(size=act_dim, act=None)
self.res_fc = layers.fc(size=act_dim, act=None)
# --------------------------------------------
def policy(self, obs):
out = self.fc1(obs)
out = self.fc2(obs)
out = self.fc3(out)
out = self.fc4(out)
out = self.fc5(out)
return layers.tanh(out + self.res_fc(obs))
# critic
class Critic(parl.Model):
def __init__(self):
# --------------------------------------------
# baseline
# self.fc1 = layers.fc(size=512, act="relu")
# self.fc2 = layers.fc(size=512, act="relu")
# self.fc3 = layers.fc(size=512, act="relu")
# self.fc4 = layers.fc(size=512, act="relu")
# self.fc5 = layers.fc(size=1, act=None)
# --------------------------------------------
self.obs_fc1 = layers.fc(size=32, act="relu")
self.obs_fc2 = layers.fc(size=64, act=None)
self.obs_fc3 = layers.fc(size=128, act=None)
self.act_fc1 = layers.fc(size=32, act="relu")
self.act_fc2 = layers.fc(size=64, act="tanh")
self.act_fc3 = layers.fc(size=128, act="tanh")
self.total_fc1 = layers.fc(size=16, act="relu")
self.total_fc2 = layers.fc(size=64, act=None)
self.total_fc3 = layers.fc(size=128, act=None)
self.re_fc1 = layers.fc(size=128 * 3, act="tanh")
self.re_fc2 = layers.fc(size=256, act="tanh")
self.re_fc3 = layers.fc(size=128, act="relu")
self.re_fc4 = layers.fc(size=1, act="tanh")
def value(self, obs, act):
concat = layers.concat([obs, act], axis=1)
# out = self.fc1(concat)
# out = self.fc2(out)
# out = self.fc3(out)
# out = self.fc4(out)
# out = self.fc5(out)
o = self.obs_fc1(obs)
o = self.obs_fc2(o)
o = self.obs_fc3(o)
a = self.act_fc1(act)
a = self.act_fc2(a)
a = self.act_fc3(a)
c = self.total_fc1(concat)
c = self.total_fc2(c)
c = self.total_fc3(c)
out = self.re_fc1(layers.concat([o, a, c], axis=1))
out = self.re_fc2(out)
out = self.re_fc3(out)
out = self.re_fc4(out)
return layers.squeeze(out, axes=[1])
# integate actor net and critic net together
class Model(parl.Model):
def __init__(self, act_dim):
self.actor_model = Actor(act_dim)
self.critic_model = Critic()
def policy(self, obs):
return self.actor_model.policy(obs)
def value(self, obs, act):
return self.critic_model.value(obs, act)
# get actor's parameter
def get_actor_params(self):
return self.actor_model.parameters()