Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Vavmoudakis Actor Critic/Kontoudis.m
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec)
function [currentState, time] = Kontoudis(initializations, currentState, finalStatesCell, reachAvoid)
global uvec
global wvec

%% Decrypt Init Vars
T = initializations{6};
N = initializations{8};
T = initializations{8};
N = initializations{10};

time = [];

%% Solve ODE
options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);

for i=1:N
sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
sol_fnt = ode45(@(t,x) actorCritic(t,x,finalStatesCell,reachAvoid,initializations),[(i-1)*T,i*T],currentState(end,:),options);

time = [time;sol_fnt.x']; % save time
currentState = [currentState;sol_fnt.y']; % save state

uDelayInterpFcn = interp2PWC(uvec,0,i*T); % interpolate control input
wDelayInterpFcn = interp2PWC(wvec,0,i*T);
x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T);
x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T);
delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
delays = {uDelayInterpFcn; wDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
initializations(end) = {delays};
end
end
108 changes: 74 additions & 34 deletions Vavmoudakis Actor Critic/actorCritic.m
Original file line number Diff line number Diff line change
@@ -1,79 +1,113 @@
function [dotx] = actorCritic(t,x, xfstate, params)%, uvec)
function [dotx] = actorCritic(t,x, finalStatesCell, reachAvoid, params)%, uvec)
global uvec
global wvec

%% Decompose Parameters Cell
A = params(1);
B = params(2);
M = params(3);
R = params(4);
Pt = params(5);
T = params(6);
Tf = params(7);
alphaa = params(9);
alphac = params(10);
amplitude = params(end-4);
percent = params(end-3);
ufinal = params(end-2);
C = params(3);
M = params(4);
R = params(5);
F = params(6);
Pt = params(7);
T = params(8);
Tf = params(9);
alphaa = params(11);
alphac = params(12);
amplitude = params(end-5);
percent = params(end-4);
ufinal = params(end-3);
wfinal = params(end-2);
Wcfinal = params(end-1);
delays = params(end);
reach = reachAvoid(1);
avoid = reachAvoid(2);
xfstate = finalStatesCell(1);
xfstateDist = finalStatesCell(2);
% Note that every variable above is a struct, thus we get its value by
% using the {} operator or by using cell2mat
A = cell2mat(A);
B = cell2mat(B);
C = cell2mat(C);
M = cell2mat(M);
R = cell2mat(R);
F = cell2mat(F);
T = cell2mat(T);
Wcfinal = cell2mat(Wcfinal);
ufinal = cell2mat(ufinal);
wfinal = cell2mat(wfinal);
reach = cell2mat(reach);
avoid = cell2mat(avoid);
xfstate = cell2mat(xfstate);
xfstateDist = cell2mat(xfstateDist);

uDelayInterpFcn = delays{1, 1}(1);
x1DelayInterpFcn = delays{1, 1}(2);
x2DelayInterpFcn = delays{1, 1}(3);

Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic
Wa = [x(9);x(10)]; %actor
p = x(11); %integral

UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2];
wDelayInterpFcn = delays{1, 1}(2);
x1DelayInterpFcn = delays{1, 1}(3);
x2DelayInterpFcn = delays{1, 1}(4);

Wc = [x(3);x(4);x(5);x(6);x(7);x(8);x(9);x(10);x(11);x(12)]; %critic
Wa = [x(13);x(14);x(15);x(16)]; %actor
p = x(17); %integral

UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(1)*wfinal; ...
xfstate(2)^2 ; xfstate(2)*ufinal; xfstate(2)*wfinal; ...
ufinal^2; ufinal*wfinal; ...
wfinal^2];
Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19

% Update control
ud = Wa'*(x(1:2)-xfstate); % equation 17

ud = Wa(1:2)'*(x(1:2)-xfstate); % equation 17
if ~avoid % reach
wd = Wa(3:4)'*(x(1:2)-xfstate);
else % avoid, reach-avoid
wd = Wa(3:4)'*(x(1:2)-xfstateDist);
end

% State and control delays
uddelay = ppval(uDelayInterpFcn{1},t-T);
wddelay = ppval(wDelayInterpFcn{1},t-T);
xdelay(1) = ppval(x1DelayInterpFcn{1},t-T);
xdelay(2) = ppval(x2DelayInterpFcn{1},t-T);

%Augmented state and Kronecker products
U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state
UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U
U = [x(1:2)-xfstate;ud-ufinal;wd-wfinal]; % Augmented state
UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(1)*wd; ...
U(2)^2 ; U(2)*ud; U(2)*wd; ...
ud^2; ud*wd; ...
wd^2]; % U kron U
% This is not the exact UkronU (9x1 matrix), but we do it to lower the
% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
% can actually happen in ec.
UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;...
xdelay(2)*uddelay; uddelay^2];

Qxx = x(3:5); % equations 14-16
Quu = x(8);
UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(1)*wddelay;...
xdelay(2)^2 ; xdelay(2)*uddelay; xdelay(2)*wddelay; ...
uddelay^2; uddelay*wddelay; ...
wddelay^2];

Qxx = x(3:6); % equations 14-16
Quu = x(11);
Qww = x(12);
Qxu = [x(6) x(7)]';
Qux = Qxu';
Qxw = [x(8) x(9)]';
Qwx = Qxw';

sigma = UkU - UkUdelay;

% integral reinforcement
dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+ud'*R*ud - uddelay'*R*uddelay);
dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+ud'*R*ud - uddelay'*R*uddelay - wd'*F*wd + wddelay'*F*wddelay);

% mine
QStar = Wc'*UkU;
uStar = -inv(Quu)*Qux*U(1:2);
wStar = inv(Qww)*Qwx*U(1:2);

% Approximation Errors
ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error
ea1 = Wa(1:2)'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error
ea2 = Wa(3:4)'*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
Expand All @@ -85,17 +119,23 @@
dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22

% actor update
dWa = -alphaa{1}*U(1:2)*ea'; % equation 23
dWa1 = -alphaa{1}*U(1:2)*ea1'; % equation 23
dWa2 = -alphaa{1}*U(1:2)*ea2'; % equation 23
dWa = [dWa1; dWa2];

% Persistent Excitation
if t<=(percent{1}/100)*Tf{1}
unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
wnew=(wd+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
else
unew=ud;
wnew=wd;
end

dx=A*[U(1);U(2)]+B*unew;
dx=A*[U(1);U(2)]+B*unew+C*wnew;
uvec = [uvec;unew];
dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state
wvec = [wvec;wnew];
dotx = [dx;dWc;dWa;dp;QStar;uStar;wStar]; % augmented state
end
103 changes: 103 additions & 0 deletions Vavmoudakis Actor Critic/distubBabyFnt.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
function [dotx] = distubBabyFnt(t,x)

global percent amplitude Tf Pt UkUfinal Pfinal xfstate xfstateDist ufinal wfinal Wcfinal
global uDelayInterpFcn wDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn
global T uvec wvec
global M R F
global alphaa alphaa2 alphac
global A B C Qxx Quu Qxu Qww Qxw sigma UkU UkUdelay

Wc = [x(3);x(4);x(5);x(6);x(7);x(8);x(9);x(10);x(11);x(12)]; %critic
Wa = [x(13);x(14);x(15);x(16)]; %actor
p = x(17); %integral

UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(1)*wfinal; ...
xfstate(2)^2 ; xfstate(2)*ufinal; xfstate(2)*wfinal; ...
ufinal^2; ufinal*wfinal; ...
wfinal^2];
Pfinal = 0.5*(xfstate)'*Pt*(xfstate); % equation 19

% Update control
ud = Wa(1:2)'*(x(1:2)-xfstate); % equation 17
wd = Wa(3:4)'*(x(1:2)-xfstate); % equation 17

% State and control delays
uddelay = ppval(uDelayInterpFcn,t-T);
wddelay = ppval(wDelayInterpFcn,t-T);
xdelay(1) = ppval(x1DelayInterpFcn,t-T);
xdelay(2) = ppval(x2DelayInterpFcn,t-T);

%Augmented state and Kronecker products
U = [x(1:2)-xfstate;ud-ufinal;wd-wfinal]; % Augmented state
Utone=U';
UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(1)*wd; ...
U(2)^2 ; U(2)*ud; U(2)*wd; ...
ud^2; ud*wd; ...
wd^2]; % U kron U
% This is not the exact UkronU (9x1 matrix), but we do it to lower the
% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
% can actually happen in ec.
UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(1)*wddelay;...
xdelay(2)^2 ; xdelay(2)*uddelay; xdelay(2)*wddelay; ...
uddelay^2; uddelay*wddelay; ...
wddelay^2];

Qxx = x(3:6); % equations 14-16
Quu = x(11);
Qww = x(12);
Qxu = [x(6) x(7)]';
Qux = Qxu';
Qxw = [x(8) x(9)]';
Qwx = Qxw';

sigma = UkU - UkUdelay;

% integral reinforcement
% dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
% +ud'*R*ud - uddelay'*R*uddelay);

dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+ud'*R*ud - uddelay'*R*uddelay - wd'*F*wd + wddelay'*F*wddelay);

% mine
QStar = Wc'*UkU;
uStar = -inv(Quu)*Qux*U(1:2);
wStar = inv(Qww)*Qwx*U(1:2);

% Approximation Errors
ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
ea1 = Wa(1:2)'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error
ea2 = Wa(3:4)'*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
% ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
% ud is slowly getting to u*, aka
% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)

% critic update
dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22

% actor update
dWa1 = -alphaa*U(1:2)*ea1'; % equation 23
dWa2 = -alphaa2*U(1:2)*ea2'; % equation 23
dWa = [dWa1; dWa2];
% dWa = max([dWa1 dWa2]);
% dWa = -alphaa*(dWa1+dWa2).*U(1:2);

% Persistent Excitation
if t<=(percent/100)*Tf
unew=(ud+amplitude*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
wnew=(wd+amplitude*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
else
unew=ud;
wnew=wd;
end

dx=A*[U(1);U(2)]+B*unew+C*wnew;
uvec = [uvec;unew];
wvec = [wvec;wnew];
dotx = [dx;dWc;dWa;dp;QStar;uStar;wStar]; % augmented state
Loading