aima-python/test_pomdp.py at master · n0whereRuoxi/aima-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from mdp import *

def test_pomdp_value_iteration():
    t_prob = [
        [#up
            [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#1
            [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#2
            [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#3
            [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#4
            [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#5
            [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#6
            [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#7
            [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#8
            [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],#9
            [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],#10
            [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],#11
            [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],#12
            [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],#13
            [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],#14
            [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],#15
            [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],#16
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0],#17
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],#18
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],#19
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],#20
        ],
        [#right
            [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#1
            [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#2
            [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#3
            [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#4
            [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#5
            [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#6
            [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],#7
            [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],#8
            [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],#9
            [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],#10
            [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],#11
            [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],#12
            [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],#13
            [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],#14
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0],#15
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],#16
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],#17
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],#18
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],#19
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],#20
        ],
        [#down
            [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#1
            [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#2
            [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#3
            [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#4
            [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],#5
            [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],#6
            [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],#7
            [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],#8
            [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],#9
            [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],#10
            [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],#11
            [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],#12
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0],#13
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],#14
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],#15
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],#16
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],#17
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],#18
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],#19
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],#20
        ],
        [#left
            [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#1
            [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#2
            [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#3
            [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#4
            [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#5
            [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],#6
            [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],#7
            [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],#8
            [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],#9
            [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],#10
            [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],#11
            [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],#12
            [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],#13
            [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],#14
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0],#15
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],#16
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],#17
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],#18
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],#19
            [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],#20
        ]
    ]
    e_prob = [
        [#up

        ],
        [#right

        ],
        [#down

        ],
        [#left

        ],
    ]
    rewards = [[5, -10], [-20, 5], [-1, -1]]

    gamma = 0.95
    actions = ('0', '1', '2', '3')
    states = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14',
        '15', '16', '17', '18', '19', )

    pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma)
    utility = pomdp_value_iteration(pomdp, epsilon=5)

    for _, v in utility.items():
        sum_ = 0
        for element in v:
            sum_ += sum(element)
    assert -9.76 < sum_ < -9.70 or 246.5 < sum_ < 248.5 or 0 < sum_ < 1

test_pomdp_value_iteration()