-
Notifications
You must be signed in to change notification settings - Fork 6
/
OUNoise.py
77 lines (65 loc) · 2.36 KB
/
OUNoise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
class OUNoise(object):
"""
OUNoise classe documentation.
This is a simple implementation of the Ornstein Uhlenbeck process noise:
x_(t+1) = x_t + theta * (mu - x_t) + sigma * e_t
where theta, mu and sigma are hyper-parameters.
"""
def __init__(self, action_space, mu=0.0, sigma=0.3, theta=0.15):
"""
Constructor of the OU Noise
:param action_space: action_ space of Gym Env
:param mu: mu hyperparameter
:param theta: theta hyperparameter
:param max_sigma: max_sigma hyperparameter
:param min_sigma: min_sigma hyperparameter
:param decay_period: decay period
"""
self.mu = mu
self.theta = theta
self.sigma = sigma
self.action_dim = action_space.shape[0]
self.low = action_space.low
self.high = action_space.high
self.state = np.ones(self.action_dim)
self.reset()
def reset(self):
"""
Reset the OU Noise.
"""
self.state = np.ones(self.action_dim) * self.mu
def evolve_state(self) -> np.ndarray:
"""
Evolve the state of OU Noise applying the transformation and returning the vector.
:return: Current State of OU Noise
"""
x = self.state
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
self.state = x + dx
return self.state
def get_action(self, action_: np.ndarray, eps: float = 1.0) -> np.ndarray:
"""
Return the action provided as parameter plus the calculated noise.
:param action_: Action-Value vector with OU Noise applied
:param eps: epsilon decay of the noise (optional and external)
:return: Action-Value vector with OU Noise applied
"""
ou_state = self.evolve_state()
return_action = action_ + eps * ou_state
return np.clip(return_action, self.low, self.high)
if __name__ == '__main__':
import gym
env = gym.make("MountainCarContinuous-v0")
ou = OUNoise(env.action_space)
states = []
values = []
env.reset()
for i in range(100):
action = env.action_space.sample()
values.append(action)
states.append(ou.get_action(action))
import matplotlib.pyplot as plt
plt.plot(values)
plt.plot(states)
plt.show()