-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbrain.py
More file actions
96 lines (78 loc) · 3.66 KB
/
brain.py
File metadata and controls
96 lines (78 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy
from mlp_network import MLP
from mlp_layer import Layer
class brainModel:
def __init__(self, reliability_for_action, discount, learning_rate, momentum, bias, hidden_layers, number_of_neurons):
#set instance variables
self.reliability_for_action = reliability_for_action
self.discount = discount
self.learning_rate = learning_rate
self.bias = bias
self.momentum = momentum
self.hidden_layers = hidden_layers
self.number_of_neurons = number_of_neurons
#create mlp
self.mlp = MLP(self.learning_rate, self.momentum)
self.mlp.add_layer(Layer(6))
#create defined number of hidden layers
for layer in range(self.hidden_layers):
self.mlp.add_layer(Layer(int(self.number_of_neurons[layer])))
self.mlp.add_layer(Layer(3))
self.mlp.init_network(self.bias)
def get_params(self):
"""
returns the instance variables
"""
return self.reliability_for_action, self.discount, self.learning_rate, self.momentum, self.bias, self.hidden_layers, self.number_of_neurons
def set_params(self, reliability_for_action, discount, learning_rate, momentum):
"""
sets changable instace variables, especially the mlp config
"""
self.reliability_for_action = reliability_for_action
self.discount = discount
self.learning_rate = learning_rate
self.mlp.learning_rate = self.learning_rate
self.mlp.discount = self.discount
self.mlp.momentum = momentum
def get_reward(self, input_vals):
"""
checks for reward
"""
right_color_no = 0 # 0 for red, 1 for green and 2 for yellow
right_color_position_no = right_color_no + 1
right_color_position_difference = abs(input_vals[right_color_position_no])
right_color = 0.0 + input_vals[right_color_no]
reward = 0.0
#right_color = 0.0 + right_color / 10
reward = right_color * (1 - right_color_position_difference)
return reward
def update_weights(self, old_q, new_q, old_action, new_action, old_input_vals, new_input_vals, reward):
"""
calculates target values for MLP and back propagates them
"""
old_q_vector = self.mlp.get_result(old_input_vals)
if (reward == 1):
prediction_error = reward
else:
prediction_error = reward + self.discount * new_q[new_action] - old_q_vector[old_action]
new_q = [old_q_vector[0],old_q_vector[1],old_q_vector[2]]
new_q[old_action] += prediction_error
error = self.mlp.back_propagate(new_q)
self.mlp.get_result(new_input_vals)
def select_action(self,q_vector):
"""
selects action based on output of the MLP init_network
"""
h = numpy.array(q_vector)
h_exp = numpy.exp(h * self.reliability_for_action)
h_exp = h_exp / numpy.sum(h_exp)
random = numpy.random.rand(1)
action = 0
if random > h_exp[0] and random < h_exp[0] + h_exp[1]:
action = 1
elif random > h_exp[0] + h_exp[1] and random < h_exp[0] + h_exp[1] + h_exp[2]:
action = 2
#comment this in for 4 actions
#elif random > h_exp[0] + h_exp[1] + h_exp[2]:
# action = 3'''
return action