Costopoulos · Costopoulos · Jul 21, 2022 · Jul 21, 2022 · Jul 21, 2022 · Jul 21, 2022
diff --git a/Vavmoudakis Actor Critic/Kontoudis.m b/Vavmoudakis Actor Critic/Kontoudis.m
@@ -1,25 +1,27 @@
-function [currentState, time] = Kontoudis(initializations, currentState, xfstate)%, uvec)
+function [currentState, time] = Kontoudis(initializations, currentState, finalStatesCell, reachAvoid)
 global uvec
+global wvec
 
 %% Decrypt Init Vars
-T = initializations{6};
-N = initializations{8};
+T = initializations{8};
+N = initializations{10};
 
 time = [];
 
 %% Solve ODE
 options = odeset('RelTol',1e-5,'AbsTol',1e-5,'MaxStep',.01);
 
 for i=1:N
-    sol_fnt = ode45(@(t,x) actorCritic(t,x,xfstate,initializations),[(i-1)*T,i*T],currentState(end,:),options);
+    sol_fnt = ode45(@(t,x) actorCritic(t,x,finalStatesCell,reachAvoid,initializations),[(i-1)*T,i*T],currentState(end,:),options);
 
     time = [time;sol_fnt.x'];    % save time
     currentState = [currentState;sol_fnt.y'];    % save state
 
     uDelayInterpFcn = interp2PWC(uvec,0,i*T);    % interpolate control input
+    wDelayInterpFcn = interp2PWC(wvec,0,i*T);
     x1DelayInterpFcn = interp2PWC(currentState(:,1),0,i*T);
     x2DelayInterpFcn = interp2PWC(currentState(:,2),0,i*T);
-    delays = {uDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
+    delays = {uDelayInterpFcn; wDelayInterpFcn; x1DelayInterpFcn; x2DelayInterpFcn};
     initializations(end) = {delays};
 end
 end
diff --git a/Vavmoudakis Actor Critic/actorCritic.m b/Vavmoudakis Actor Critic/actorCritic.m
@@ -1,79 +1,113 @@
-function [dotx] = actorCritic(t,x, xfstate, params)%, uvec)
+function [dotx] = actorCritic(t,x, finalStatesCell, reachAvoid, params)%, uvec)
 global uvec
+global wvec
 
 %% Decompose Parameters Cell
 A = params(1);
 B = params(2);
-M = params(3);
-R = params(4);
-Pt = params(5);
-T = params(6);
-Tf = params(7);
-alphaa = params(9);
-alphac = params(10);
-amplitude = params(end-4);
-percent = params(end-3);
-ufinal = params(end-2);
+C = params(3);
+M = params(4);
+R = params(5);
+F = params(6);
+Pt = params(7);
+T = params(8);
+Tf = params(9);
+alphaa = params(11);
+alphac = params(12);
+amplitude = params(end-5);
+percent = params(end-4);
+ufinal = params(end-3);
+wfinal = params(end-2);
 Wcfinal = params(end-1);
 delays = params(end);
+reach = reachAvoid(1);
+avoid = reachAvoid(2);
+xfstate = finalStatesCell(1);
+xfstateDist = finalStatesCell(2);
 % Note that every variable above is a struct, thus we get its value by
 % using the {} operator or by using cell2mat
 A = cell2mat(A);
 B = cell2mat(B);
+C = cell2mat(C);
 M = cell2mat(M);
 R = cell2mat(R);
+F = cell2mat(F);
 T = cell2mat(T);
 Wcfinal = cell2mat(Wcfinal);
 ufinal = cell2mat(ufinal);
+wfinal = cell2mat(wfinal);
+reach = cell2mat(reach);
+avoid = cell2mat(avoid);
+xfstate = cell2mat(xfstate);
+xfstateDist = cell2mat(xfstateDist);
 
 uDelayInterpFcn = delays{1, 1}(1);
-x1DelayInterpFcn = delays{1, 1}(2);
-x2DelayInterpFcn = delays{1, 1}(3);
-
-Wc = [x(3);x(4);x(5);x(6);x(7);x(8)]; %critic
-Wa = [x(9);x(10)]; %actor
-p = x(11); %integral 
-
-UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(2)^2 ; xfstate(2)*ufinal; ufinal^2];
+wDelayInterpFcn = delays{1, 1}(2);
+x1DelayInterpFcn = delays{1, 1}(3);
+x2DelayInterpFcn = delays{1, 1}(4);
+
+Wc = [x(3);x(4);x(5);x(6);x(7);x(8);x(9);x(10);x(11);x(12)]; %critic
+Wa = [x(13);x(14);x(15);x(16)]; %actor
+p = x(17); %integral 
+
+UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(1)*wfinal; ...
+            xfstate(2)^2 ; xfstate(2)*ufinal; xfstate(2)*wfinal; ...
+            ufinal^2; ufinal*wfinal; ...
+            wfinal^2];
 Pfinal = 0.5*(xfstate)'*Pt{1}*(xfstate); % equation 19
 
 % Update control
-ud = Wa'*(x(1:2)-xfstate); % equation 17
-
+ud = Wa(1:2)'*(x(1:2)-xfstate); % equation 17
+if ~avoid % reach
+    wd = Wa(3:4)'*(x(1:2)-xfstate);  
+else % avoid, reach-avoid
+    wd = Wa(3:4)'*(x(1:2)-xfstateDist);
+end
 
 % State and control delays
 uddelay = ppval(uDelayInterpFcn{1},t-T);
+wddelay = ppval(wDelayInterpFcn{1},t-T);
 xdelay(1) = ppval(x1DelayInterpFcn{1},t-T);
 xdelay(2) = ppval(x2DelayInterpFcn{1},t-T);
 
 %Augmented state and Kronecker products
-U = [x(1:2)-xfstate;ud-ufinal]; % Augmented state
-UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(2)^2 ; U(2)*ud; ud^2]; % U kron U
+U = [x(1:2)-xfstate;ud-ufinal;wd-wfinal]; % Augmented state
+UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(1)*wd; ...
+       U(2)^2 ; U(2)*ud; U(2)*wd; ...
+       ud^2; ud*wd; ...
+       wd^2]; % U kron U
 % This is not the exact UkronU (9x1 matrix), but we do it to lower the
 % dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
 % can actually happen in ec.
-UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(2)^2 ;...
-    xdelay(2)*uddelay; uddelay^2];
-
-Qxx = x(3:5); % equations 14-16
-Quu = x(8);
+UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(1)*wddelay;...
+            xdelay(2)^2 ; xdelay(2)*uddelay; xdelay(2)*wddelay; ...
+            uddelay^2; uddelay*wddelay; ...
+            wddelay^2];
+
+Qxx = x(3:6); % equations 14-16
+Quu = x(11);
+Qww = x(12);
 Qxu = [x(6) x(7)]';
 Qux = Qxu';
+Qxw = [x(8) x(9)]';
+Qwx = Qxw';
 
 sigma = UkU - UkUdelay;
 
 % integral reinforcement
-dp =  0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
-    +ud'*R*ud - uddelay'*R*uddelay); 
+dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+    +ud'*R*ud - uddelay'*R*uddelay - wd'*F*wd + wddelay'*F*wddelay);
 
 % mine
 QStar = Wc'*UkU;
 uStar = -inv(Quu)*Qux*U(1:2);
+wStar = inv(Qww)*Qwx*U(1:2);
 
 % Approximation Errors 
 ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
 ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
-ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
+ea1 = Wa(1:2)'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
+ea2 = Wa(3:4)'*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
 % or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
 % BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
 % to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
@@ -85,17 +119,23 @@
 dWc = -alphac{1}*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22
 
 % actor update
-dWa = -alphaa{1}*U(1:2)*ea'; % equation 23
+dWa1 = -alphaa{1}*U(1:2)*ea1'; % equation 23
+dWa2 = -alphaa{1}*U(1:2)*ea2'; % equation 23
+dWa = [dWa1; dWa2];
 
 % Persistent Excitation
 if t<=(percent{1}/100)*Tf{1}
     unew=(ud+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
         sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
+    wnew=(wd+amplitude{1}*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
+        sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
 else
     unew=ud;
+    wnew=wd;
 end
 
-dx=A*[U(1);U(2)]+B*unew;
+dx=A*[U(1);U(2)]+B*unew+C*wnew;
 uvec = [uvec;unew];
-dotx = [dx;dWc;dWa;dp;QStar;uStar]; % augmented state
+wvec = [wvec;wnew];
+dotx = [dx;dWc;dWa;dp;QStar;uStar;wStar]; % augmented state
 end
diff --git a/Vavmoudakis Actor Critic/distubBabyFnt.m b/Vavmoudakis Actor Critic/distubBabyFnt.m
@@ -0,0 +1,103 @@
+function [dotx] = distubBabyFnt(t,x)
+
+global percent amplitude Tf Pt UkUfinal Pfinal xfstate xfstateDist ufinal wfinal Wcfinal
+global uDelayInterpFcn wDelayInterpFcn x1DelayInterpFcn x2DelayInterpFcn
+global T uvec wvec
+global M R F
+global alphaa alphaa2 alphac
+global A B C Qxx Quu Qxu Qww Qxw sigma UkU UkUdelay
+
+Wc = [x(3);x(4);x(5);x(6);x(7);x(8);x(9);x(10);x(11);x(12)]; %critic
+Wa = [x(13);x(14);x(15);x(16)]; %actor
+p = x(17); %integral 
+
+UkUfinal = [xfstate(1)^2 ; xfstate(1)*xfstate(2); xfstate(1)*ufinal; xfstate(1)*wfinal; ...
+            xfstate(2)^2 ; xfstate(2)*ufinal; xfstate(2)*wfinal; ...
+            ufinal^2; ufinal*wfinal; ...
+            wfinal^2];
+Pfinal = 0.5*(xfstate)'*Pt*(xfstate); % equation 19
+
+% Update control
+ud = Wa(1:2)'*(x(1:2)-xfstate); % equation 17
+wd = Wa(3:4)'*(x(1:2)-xfstate); % equation 17
+
+% State and control delays
+uddelay = ppval(uDelayInterpFcn,t-T);
+wddelay = ppval(wDelayInterpFcn,t-T);
+xdelay(1) = ppval(x1DelayInterpFcn,t-T);
+xdelay(2) = ppval(x2DelayInterpFcn,t-T);
+
+%Augmented state and Kronecker products
+U = [x(1:2)-xfstate;ud-ufinal;wd-wfinal]; % Augmented state
+Utone=U';
+UkU = [U(1)^2 ; U(1)*U(2); U(1)*ud; U(1)*wd; ...
+       U(2)^2 ; U(2)*ud; U(2)*wd; ...
+       ud^2; ud*wd; ...
+       wd^2]; % U kron U
+% This is not the exact UkronU (9x1 matrix), but we do it to lower the
+% dimensionality. This way, Wc' would be 1x9, so the matrix multiplication
+% can actually happen in ec.
+UkUdelay = [xdelay(1)^2 ; xdelay(1)*xdelay(2); xdelay(1)*uddelay; xdelay(1)*wddelay;...
+            xdelay(2)^2 ; xdelay(2)*uddelay; xdelay(2)*wddelay; ...
+            uddelay^2; uddelay*wddelay; ...
+            wddelay^2];
+
+Qxx = x(3:6); % equations 14-16
+Quu = x(11);
+Qww = x(12);
+Qxu = [x(6) x(7)]';
+Qux = Qxu';
+Qxw = [x(8) x(9)]';
+Qwx = Qxw';
+
+sigma = UkU - UkUdelay;
+
+% integral reinforcement
+% dp =  0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+%     +ud'*R*ud - uddelay'*R*uddelay);
+
+dp = 0.5*((x(1:2)-xfstate)'*M*(x(1:2)-xfstate) -xdelay(1:2)*M*xdelay(1:2)' ...
+    +ud'*R*ud - uddelay'*R*uddelay - wd'*F*wd + wddelay'*F*wddelay); 
+
+% mine
+QStar = Wc'*UkU;
+uStar = -inv(Quu)*Qux*U(1:2);
+wStar = inv(Qww)*Qwx*U(1:2);
+
+% Approximation Errors 
+ec = p + Wc'*UkU - Wc'*UkUdelay; % Critic approximator error
+ecfinal = Pfinal - Wcfinal'*UkUfinal; % Critic approximator error final state
+ea1 = Wa(1:2)'*U(1:2)+.5*inv(Quu)*Qux*U(1:2); % Actor approximator error 
+ea2 = Wa(3:4)'*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
+% ea = Wa'*U(1:2)+.5*inv(Quu)*Qux*U(1:2)-.5*inv(Qww)*Qwx*U(1:2);
+% or x(1:2) - xfstate instead of U(1:2) % or ud instead of Wa'*U(1:2)
+% BASICALLY, looking at eq30, ud = Wa'*U(1:2) = Wa'(x(1:2)-xfstate) wants
+% to reach u* = inv(Quu)*Qux*U(1:2), that's why ea is defined like this.
+% ud is slowly getting to u*, aka
+% Wa'*U(1:2) -> inv(Quu)*Qux*U(1:2)
+
+% critic update
+dWc = -alphac*((sigma./(sigma'*sigma+1).^2)*ec'+(UkUfinal./((UkUfinal'*UkUfinal+1).^2)*ecfinal')); % eq 22
+
+% actor update
+dWa1 = -alphaa*U(1:2)*ea1'; % equation 23
+dWa2 = -alphaa2*U(1:2)*ea2'; % equation 23
+dWa = [dWa1; dWa2];
+% dWa = max([dWa1 dWa2]);
+% dWa = -alphaa*(dWa1+dWa2).*U(1:2);
+
+% Persistent Excitation
+if t<=(percent/100)*Tf
+    unew=(ud+amplitude*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
+        sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
+    wnew=(wd+amplitude*exp(-0.009*t)*2*(sin(t)^2*cos(t)+sin(2*t)^2*cos(0.1*t)+...
+        sin(-1.2*t)^2*cos(0.5*t)+sin(t)^5+sin(1.12*t)^2+cos(2.4*t)*sin(2.4*t)^3));
+else
+    unew=ud;
+    wnew=wd;
+end
+
+dx=A*[U(1);U(2)]+B*unew+C*wnew;
+uvec = [uvec;unew];
+wvec = [wvec;wnew];
+dotx = [dx;dWc;dWa;dp;QStar;uStar;wStar]; % augmented state