-
Notifications
You must be signed in to change notification settings - Fork 2
/
episode.py
55 lines (50 loc) · 2.1 KB
/
episode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import torch
import numpy as np
from typing import Tuple, Any, Union, Optional, Dict
TensorType = Any
class Episode(object):
""" Episode Class which contains the related
attributes of an environment episode in the
the format similar to queue"""
def __init__(self,
obs:TensorType,
action_space: int = 1,
action_repeat: int = 2) -> None:
"""Initializes a list of all episode attributes"""
self.action_space = action_space
self.action_repeat = action_repeat
obs = torch.FloatTensor(np.ascontiguousarray(obs.transpose
((2, 0, 1))))
self.t = 1
self.obs = [obs]
self.action = [torch.FloatTensor(torch.zeros(1, self.action_space))]
self.reward = [0]
self.done = [False]
def append(self,
episode_attrs: Tuple[TensorType]) -> None:
""" Appends episode attribute to the list."""
obs, action, reward, done = episode_attrs
obs = torch.FloatTensor(np.ascontiguousarray(obs.transpose
((2, 0, 1))))
self.t += 1
self.obs.append(obs)
self.action.append(action)
self.reward.append(reward)
self.done.append(done)
def reset(self,
obs:TensorType) -> None:
""" Resets Episode list of attributes."""
obs = torch.FloatTensor(np.ascontiguousarray(obs.transpose
((2, 0, 1))))
self.t = 1
self.obs = [obs]
self.action = [torch.FloatTensor(torch.zeros(1, self.action_space))]
self.reward = [0]
self.done = [False]
def todict(self,) -> Dict:
episode_dict = dict({'count': self.t,
'obs': torch.stack(self.obs),
'action': torch.cat(self.action),
'reward': torch.FloatTensor(self.reward),
'done': torch.BoolTensor(self.done)})
return episode_dict