-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathQTable.py
More file actions
129 lines (102 loc) · 4.33 KB
/
QTable.py
File metadata and controls
129 lines (102 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import datetime
import numpy as np
import pandas as pd
import os
class QTable(object):
stateCount = 0
stateList = {}
def __init__(
self,
observation_space=1,
action_space=32,
alpha=0.3,
gamma=0.9,
):
self.alpha = alpha
self.gamma = gamma
self.observation_space = observation_space
self.action_space = action_space
self.q = np.zeros(self.observation_space * self.action_space)\
.reshape((self.observation_space, self.action_space))
def addStateList(self, stateName, noActionClickable, noActionTextInput):
for key, value in self.stateList.items():
if (value[0],value[1],value[2]) == (stateName,noActionClickable, noActionTextInput):
print("key in statelist =" + str(key))
return key
self.stateList[self.stateCount] = [stateName, noActionClickable, noActionTextInput]
print(len(self.stateList))
self.addStateToQ()
self.stateCount +=1
return -1
def eq(self, state=None, action=None):
if state is None:
return self.q
if action is None:
return self.q[state]
return self.q[state][action]
def update_q(self, state, action, value,i):
self.q[state][action] = value
for element in i :
self.q[state][element] = value
def max_q(self, state):
return np.max(self.q[state])
def old_value(self, state, action):
return (1 - self.alpha) * self.eq(state, action)
def discounted_reward(self, state):
return self.gamma * self.max_q(state)
def sarsa_max_update(self, s, a, r, new_s, i):
new_value = self.old_value(s, a) + (self.alpha * (r + self.discounted_reward(new_s) - self.eq(s, a)))
self.update_q(s, a, new_value,i)
def saveQ(self, score, name):
df = pd.DataFrame(self.q[:-1, :],dtype=np.float)
if not os.path.exists('result/QValue/' +str(name) +'QValue.csv'):
df.to_csv('result/QValue/' +str(name) +"QValue.csv")
else: # else it exists so append without writing the header
x = 1
nn = 'result/QValue/' +str(name) + "("+ str(x) +")QValue.csv"
while os.path.exists(nn):
x += 1
nn = 'result/QValue/' +str(name) + "("+ str(x) +")QValue.csv"
df.to_csv(nn)
def saveStateList(self,name):
st = pd.DataFrame.from_dict(self.stateList, orient = "index")
stateList_str = 'result/Statelist/'
if not os.path.exists(stateList_str +str(name) +'stateList.csv'):
st.to_csv(stateList_str +str(name) +"stateList.csv")
else: # else it exists so append without writing the header
y = 1
yy = stateList_str +str(name) + "("+ str(y) +")stateList.csv"
while os.path.exists(yy):
y += 1
yy = stateList_str +str(name) + "("+ str(y) +")stateList.csv"
st.to_csv(yy)
def readQ(self,name):
readFile = pd.read_csv('result/QValue/' +str(name) + 'QValue.csv',header = None).values[1:,1:]
self.q = readFile
self.observation_space = np.size(self.q,0)
self.q.reshape((self.observation_space, self.action_space))
BB = np.zeros(self.action_space)\
.reshape((1, self.action_space))
CC = np.vstack((self.q,BB))
self.q = CC
print ("q = ")
print (self.q)
print("readFile = ")
print (readFile)
def readStateList(self, name):
rd = pd.read_csv('result/Statelist/' +str(name) + "stateList.csv").iloc[:, 1:]
d = rd.to_dict("split")
d = dict(zip(d["index"], d["data"]))
self.stateList = d
print ("stateList = ")
print(self.stateList)
print ("d = ")
print(d)
def addStateToQ(self):
A = self.q
B = np.zeros(self.action_space)\
.reshape((1, self.action_space))
C = np.vstack((A,B))
print("length after add +1 to Q = "+ str(len(C)))
self.q = C
self.observation_space = self.observation_space +1