Skip to content

Commit cf0319a

Browse files
Fix Chapter 8 test: use truncated episodic λ-return to match backward-view sum
1 parent 95cdf66 commit cf0319a

1 file changed

Lines changed: 13 additions & 11 deletions

File tree

ch8_td_lambda/tests/test_forward_backward_equiv.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,30 @@
11
import numpy as np
22

3-
def lambda_return(b, c, lam):
4-
G1 = b
5-
G2 = c
6-
G3 = 1.0
7-
return (1 - lam) * (G1 + lam * G2 + lam**2 * G3)
3+
def truncated_lambda_return(b, c, lam):
4+
"""
5+
Finite-episode λ-return for a 3-step episode with rewards (0,0,1), γ=1.
6+
G^(1)=b, G^(2)=c, G^(3)=1.
7+
G^λ = (1-λ)(G1 + λ G2) + λ^2 G3
8+
"""
9+
G1, G2, G3 = b, c, 1.0
10+
return (1 - lam) * (G1 + lam * G2) + (lam ** 2) * G3
811

912
def test_forward_backward_equivalence():
1013
a, b, c = 0.5, 0.3, 0.2
1114
lam = 0.5
1215

13-
# forward λ-return update for V(s0) = a
14-
Glam = lambda_return(b, c, lam)
16+
# Forward (truncated episodic λ-return)
17+
Glam = truncated_lambda_return(b, c, lam)
1518
forward_update = Glam - a
1619

17-
# backward view TD error updates
20+
# Backward: TD errors and eligibilities for s0 (γ=1)
1821
d0 = b - a
1922
d1 = c - b
2023
d2 = 1.0 - c
2124

22-
# eligibilities for s0 at each step
2325
e0 = 1.0
24-
e1 = lam # after one step
25-
e2 = lam**2 # after two steps (γ=1 here)
26+
e1 = lam
27+
e2 = lam ** 2
2628

2729
backward_update = d0 * e0 + d1 * e1 + d2 * e2
2830

0 commit comments

Comments
 (0)