-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathestimator.py
56 lines (45 loc) · 1.74 KB
/
estimator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import torch.nn as nn
class Estimator(nn.Module):
def __init__(self, num_actions, agent_history_length=4):
"""
Estimator class; returns Q-values
"""
super(Estimator, self).__init__()
self.model = nn.Sequential(
# Input: batch x m x 84 x 84
nn.Conv2d(in_channels=agent_history_length, out_channels=32, kernel_size=8, stride=4),
#nn.BatchNorm2d(32),
nn.ReLU(),
# Input: batch x 32 x 20 x 20
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2),
#nn.BatchNorm2d(64),
nn.ReLU(),
# Input: batch x 64 x 9 x 9
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
#nn.BatchNorm2d(64),
nn.ReLU(),
# Input: batch x 64 x 7 x 7
nn.Flatten(1),
# Input: batch x 3136
nn.Linear(in_features=3136, out_features=512),
nn.ReLU(),
# Input: batch x 512
nn.Linear(in_features=512, out_features=num_actions)
)
self._initialize_weights()
def forward(self, x):
out = self.model(x)
return out
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight)
class transfer_model(nn.Module):
def __init__(self, base_model, num_actions):
super(transfer_model, self).__init__()
self.features = nn.Sequential(*list(base_model.model[:-1]))
self.fc = nn.Linear(in_features=512, out_features=num_actions)
def forward(self, x):
out = self.features(x)
out = self.fc(out)
return out