-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqLearning.py
More file actions
66 lines (55 loc) · 2.02 KB
/
qLearning.py
File metadata and controls
66 lines (55 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import random
#SAMPLE Q-LEARNING CODE
# Environment settings
gamma = 0.9 # Discount factor
alpha = 0.1 # Learning rate
epsilon = 0.1 # Exploration rate
# Define the environment: a 4x4 grid world
grid_size = 4
goal_state = (3, 3)
# Initialize the Q-table
q_table = np.zeros((grid_size, grid_size, 4)) # 4 actions: up, down, left, right
# Actions
actions = ['up', 'down', 'left', 'right']
action_dict = {
'up': (-1, 0),
'down': (1, 0),
'left': (0, -1),
'right': (0, 1)
}
# Function to choose an action using the epsilon-greedy strategy
def choose_action(state):
if random.uniform(0, 1) < epsilon:
return random.choice(actions) # Explore
else:
return actions[np.argmax(q_table[state])] # Exploit
# Function to get the next state given the current state and action
def next_state(state, action):
row, col = state
row_change, col_change = action_dict[action]
next_row = min(max(row + row_change, 0), grid_size - 1)
next_col = min(max(col + col_change, 0), grid_size - 1)
return next_row, next_col
# Function to update the Q-value
def update_q_value(state, action, reward, next_state):
action_idx = actions.index(action)
best_next_action = np.max(q_table[next_state])
q_table[state][action_idx] = (1 - alpha) * q_table[state][action_idx] + alpha * (reward + gamma * best_next_action)
# Function to run the Q-learning algorithm
def q_learning(episodes):
for episode in range(episodes):
state = (0, 0) # Start state
while state != goal_state:
action = choose_action(state)
next_s = next_state(state, action)
reward = 1 if next_s == goal_state else -0.1 # Reward for reaching the goal or penalty otherwise
update_q_value(state, action, reward, next_s)
state = next_s
# Run the Q-learning algorithm
q_learning(1000)
# Print the learned Q-table
print("Learned Q-Table:")
for row in range(grid_size):
for col in range(grid_size):
print(f"State ({row}, {col}): {q_table[row, col]}")