DeepQLearning/QLearningParameters.py at master · itej89/DeepQLearning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

from collections import deque


class QLearningParameters():

        #STANDARD Q-Learning EQUATION
        #----------------------------------------------------------------------------------------------
        #   Q(s, a) = ( 1 - learingRate ) * Q(s, a) + learingRate * ( reward + gamma * max(Q(s', a)) )
        #   (We have to learn this equation through a Neural Network)
        #----------------------------------------------------------------------------------------------

        #------------------------------------------------------------------
        #QLearning hyper Parameters
        #------------------------------------------------------------------
        #QLearning Parameters
        #Discount rate for the QLearning
        GAMMA=0.95

        #Explorarion vs exploitation
        #The Probability to chosing a random action over the current learned policy
        EPSILON = 1
        EPSILON_DECAY_VALUE = 0.95
        EPSILON_MIN_VALUE=0.1
        #------------------------------------------------------------------


        #------------------------------------------------------------------
        #NN hyper Parameters
        #------------------------------------------------------------------
        #Learning rate parameter for QLearning
        LEARNING_RATE=0.01

        #The batch size to train the neural network in each epoch
        BATCH_SIZE=32
        #------------------------------------------------------------------


        #------------------------------------------------------------------
        #Update loop parameters
        #------------------------------------------------------------------
        #Total Number of training episodes for the agent to explore
        TOTAL_EPISODES=100

        #Maximum number of steps the agent can takes before exiting the environment
        MAX_STEPS_PER_EPISODE=200

        #Save Models for every so many episodes
        SAVE_MODEL_FOR_STEPS = 20

        #Render the Simulator every so many episodes
        RENDER_EVERY = 50

        #Model Save Path
        MODEL_PATH_SAVE = "./Models/Cartpole-V0"
        #------------------------------------------------------------------


        #Buffer to store all the states genrated by the agent
        # by navigating the environment through different actions
        replayBuffer=deque(maxlen=20000)