-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathnormalized_actions.py
More file actions
60 lines (46 loc) · 1.98 KB
/
normalized_actions.py
File metadata and controls
60 lines (46 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gym
import torch
class NormalizedActions(gym.ActionWrapper):
def action(self, action):
action = (action + 1) / 2 # [-1, 1] => [0, 1]
action *= (self.action_space.high - self.action_space.low)
action += self.action_space.low
return action
def _action(self, action):
action = (action + 1) / 2 # [-1, 1] => [0, 1]
action *= (self.action_space.high - self.action_space.low)
action += self.action_space.low
return action
def _reverse_action(self, action):
action -= self.action_space.low
action /= (self.action_space.high - self.action_space.low)
action = action * 2 - 1
return action
def normalize(x, stats):
if stats is None:
return x
return (x - stats.mean) / (stats.var + 1e-8).sqrt()
class RunningMeanStd(object):
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
def __init__(self, epsilon=1e-4, shape=(), device=torch.device('cpu')):
self.mean = torch.zeros(shape).to(device)
self.var = torch.ones(shape).to(device)
self.count = epsilon
def update(self, x):
batch_mean = torch.mean(x, dim=0)
batch_var = torch.var(x, dim=0)
batch_count = x.shape[0]
self.update_from_moments(batch_mean, batch_var, batch_count)
def update_from_moments(self, batch_mean, batch_var, batch_count):
self.mean, self.var, self.count = update_mean_var_count_from_moments(
self.mean, self.var, self.count, batch_mean, batch_var, batch_count)
def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, batch_count):
delta = batch_mean - mean
tot_count = count + batch_count
new_mean = mean + delta * batch_count / tot_count
m_a = var * count
m_b = batch_var * batch_count
M2 = m_a + m_b + delta.sqrt() * count * batch_count / tot_count
new_var = M2 / tot_count
new_count = tot_count
return new_mean, new_var, new_count