-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimulatorCore.m
More file actions
137 lines (109 loc) · 4.04 KB
/
simulatorCore.m
File metadata and controls
137 lines (109 loc) · 4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
%%Script for core of execution in simulator D2DSS
% It is responsable for resquest and wait action decision to be taken by an agent
% Schemed as a wait-for scheduler, there is no time limit to turn exceed for waiting for an action be choosen
% so any techniques could be used
%%
% isOctave = exist('OCTAVE_VERSION', 'builtin') ~= 0; %used in Octave compatibility of simulator D2DSS
aOrder = [0,1,2,4,3,5,6,8,7,11,12,9,10,13]; %order of actions in structure implemented
rewarded = sparse(n, total); %sparse matrix used to store value of game by iteration (-1 opponent goal, +1 team goal, 0 no goal is not literally represented by this structure)
agents = cell(1,n); %store agents of team in memory
for p=pInit:n
Mb = M;
Mb.Ta = M.Ta;
Mb.Tb = M.Tb;
if Mb.Ta > 0
agentA = CreateAgentA(M);
end
if Mb.Tb > 0
agentB = CreateAgentB(Mb);
end
J = setGame(total);
S=state(M);
reward = rand-0.5;
% Start position of players (could be random or regular distribution)
if M.randAfterGoal
S.startPositionRand(M);
else
S.startPositionGoal(M,reward<0);
end
timegame = 0;
notFirst = false;
aNew = zeros(1,M.Ta+M.Tb);
sNewA= [];
sNewB= [];
reward =0;
%Main loop
horizon = M.horizon;
while J.timer>0
timegame = timegame+1;
% In each horizon sets start positions for all players
if horizon == 0
S.startPositionRand(M);
notFirst = false;
horizon = M.horizon;
if (shows)
showGame(S, M, J);
end
end
horizon = horizon-1;
% request an action for each player
aOld = aNew;
sOldA = sNewA;
sOldB = sNewB;
sNewA = S.factoringA();
sNewB = S.factoringB();
if M.Ta > 0
aNew(1:M.Ta) = ChooseActionA(M.Ta,agentA,sOldA,aOld(1:M.Ta),reward,sNewA,notFirst,timegame/total);
end
if M.Tb > 0
aNew(M.Ta+1:end) = aOrder(ChooseActionB(M.Tb,agentB,sOldB,aOld(M.Ta+1:end),reward,sNewB,notFirst,timegame/total)+1);
end
notFirst = true;
reward = S.move(aNew);
rewarded(p,timegame) = reward;
if abs(reward)
if (shows)
showGame(S, M, J);
end
if M.randAfterGoal
S.startPositionRand(M);
else
S.startPositionGoal(M,reward<0);
end
end
J.scoreA=J.scoreA+max(reward,0);
J.scoreB=J.scoreB+max(-reward,0);
J.timer=J.timer-1;
if (shows)
showGame(S, M, J);
if (stepbystep)
pause;
end
end
if mod(J.timer,nIterShow) == 0
[p J.timer/J.total]
full([sum(J.scoreA) sum(J.scoreB)])'
if plotPartial
if p > 1
plot(nIterShow*[1:(total-J.timer)/nIterShow],mean(reshape(rewarded(p,1:(total-J.timer)),nIterShow,(total-J.timer)/nIterShow),1),...
nIterShow*[1:(total-0)/nIterShow],mean(reshape(rewarded(1:(p-1),1:(total-0)),(p-1)*nIterShow,(total-0)/nIterShow),1));
else
plot(nIterShow*[1:(total-J.timer)/nIterShow],mean(reshape(rewarded(p,1:(total-J.timer)),nIterShow,(total-J.timer)/nIterShow),1));
end
drawnow
end
% if isOctave
% fflush(stdout);
% end
end
end
% This part made save of files of memory of agents and rewarded that is also score of game simulated
if saveAgent
agents{p} = agentA;
save(tempFile,'agents','M');
movefile(tempFile,agentStorage);
end
save(tempFile,'rewarded','p');
movefile(tempFile,scoreStorage);
J.timer = total;
end