This repository was archived by the owner on May 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain_gradient.py
More file actions
141 lines (115 loc) · 4.99 KB
/
main_gradient.py
File metadata and controls
141 lines (115 loc) · 4.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# coding: utf-8
import os
import numpy as np
import matplotlib.pyplot as plt
import argparse
import pickle
import lzma
import gym
from progress.bar import Bar
from savedGradient import SavedGradient
from slowBar import SlowBar
from vector_util import *
# import torch
from chrono import Chrono
from simu import make_simu_from_params
from policies import GenericNet,BernoulliPolicy, NormalPolicy, SquashedGaussianPolicy, DiscretePolicy, PolicyWrapper
from arguments import get_args
from numpy.random import random
def evaluate_policy(params, env, weights):
policy = NormalPolicy(env.observation_space.shape[0], 24, 36, 1, params.lr_actor)
policy.set_weights(weights)
average_tot_score=0
for j in range(int(args.nb_evals)):
state = env.reset()
total_reward = 0
for t in range(params.max_episode_steps):
action = policy.select_action(state, params.deterministic_eval)
# print("action", action)
next_state, reward, done, _ = env.step(action)
total_reward += reward
state = next_state
if done:
average_tot_score+=total_reward/args.nb_evals
break
return average_tot_score
def load_policies(folder):
"""
:param: folder : name of the folder containing policies
Output : none (policies of the folder stored in self.env_dict)
"""
listdir = os.listdir(folder)
policies = []
for policy_file in listdir:
pw = PolicyWrapper(GenericNet(), 0, "", "", "", 0)
policy,_ = pw.load(directory+policy_file)
policy = policy.get_weights()
policies.append(policy)
return policies
if __name__ == '__main__':
args = get_args()
print(args)
env = gym.make(args.env_name)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = int(env.action_space.high[0])
directory = os.getcwd() + '/Models/'
policies=load_policies(directory)
if len(np.shape(policies))>1:
theta0 = policies[0]
else:
theta0 = policies
policies = [policies]
num_params = len(theta0)
base_vect = theta0
D = getDirectionsMuller(args.nb_lines,num_params)
D = order_all_by_proximity(D)
# Compute fitness over these directions :
previous_theta = None # Remembers theta
newGradient = SavedGradient(directions=[], results=[], red_markers=[], green_markers=[],nbLines=args.line_height, pixelWidth=args.pixelWidth, pixelHeight=args.pixelHeight, maxValue=args.maxValue,dotText=args.dotText, dotWidth=args.dotWidth, xMargin=args.xMargin, yMargin=int(args.pixelHeight/2))
for policy in policies:
filename = args.filename+".png"
# Load the model
# Get the new parameters
theta0 = policy
init_score=evaluate_policy(args,env,policy)
base_vect = theta0 if previous_theta is None else theta0 - previous_theta
previous_theta = theta0
print("Loaded parameters")
length_dist = euclidienne(base_vect, np.zeros(np.shape(base_vect)))
d = np.zeros(np.shape(base_vect)) if length_dist ==0 else base_vect / length_dist
newGradient.directions.append(d)
# New parameters following the direction
theta_plus, theta_minus = getPointsDirection(theta0, num_params, args.minalpha, args.maxalpha, args.stepalpha, d)
# Processing the provided policies
# Distance of each policy along their directions, directions taken by the policies
scores_plus, scores_minus = [], []
with SlowBar('Evaluating along the direction', max=len(theta_plus)) as bar:
for param_i in range(len(theta_plus)):
# Go forward in the direction
# Get the new performance
scores_plus.append(evaluate_policy(args,env,theta_plus[param_i]))
# Go backward in the direction
# Get the new performance
scores_minus.append(evaluate_policy(args,env,theta_minus[param_i]))
bar.next()
scores_minus = scores_minus[::-1]
line = scores_minus + [init_score] + scores_plus
# Adding the results
last_params_marker = int(length_dist/args.stepalpha)
# Mark two consecutive positions on the line
marker_actor = int((len(line)-1)/2)
marker_last = max(marker_actor-last_params_marker, 0)
# A list of the markers, previous will be shown in red and current in green
newGradient.red_markers.append(marker_last)
newGradient.green_markers.append(marker_actor)
# Putting it all together
newGradient.results.append(line)
try:
# Assembling the image, saving it if asked
newGradient.computeImage(saveImage=args.saveImage, filename=filename, directory=args.directoryImage)
except Exception as e:
newGradient.saveGradient(filename=filename, directory=args.directoryFileGrad)
# Saving the SavedGradient if asked
if args.saveFile is True: newGradient.saveGradient(filename=filename, directory=args.directoryFileGrad)
env.close()